From 27a9f5f41c8e23af7595060f0abb8344c9c51e19 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 4 Feb 2025 10:09:06 +0100 Subject: [PATCH 01/16] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Improve=20get=5Fresu?= =?UTF-8?q?lt()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/get_result.R | 2 +- R/get_result_character.R | 105 +++++++++++++++++++------------------ R/get_result_n2k_inla.R | 2 +- R/get_result_n2kmanifest.R | 81 +++++----------------------- R/get_result_s3_object.R | 29 +++++++++- man/get_result.Rd | 34 ++++++------ 6 files changed, 113 insertions(+), 140 deletions(-) diff --git a/R/get_result.R b/R/get_result.R index f8583d20..1a068e30 100644 --- a/R/get_result.R +++ b/R/get_result.R @@ -8,7 +8,7 @@ #' @importFrom methods setGeneric setGeneric( name = "get_result", - def = function(x, ...) { + def = function(x, base, ...) { standardGeneric("get_result") # nocov } ) diff --git a/R/get_result_character.R b/R/get_result_character.R index df007caf..48b4da7d 100644 --- a/R/get_result_character.R +++ b/R/get_result_character.R @@ -1,65 +1,70 @@ #' @rdname get_result #' @importFrom methods setMethod validObject new -#' @importFrom assertthat assert_that is.string is.count +#' @importFrom assertthat assert_that is.string noNA #' @importFrom utils file_test -#' @param n_cluster The number of clusters to run this function in parallel. -#' Defaults to `1` (= no parallel computing). setMethod( f = "get_result", - signature = signature(x = "character"), - definition = function( - x, - n_cluster = 1, - verbose = TRUE, - ... - ) { + signature = signature(x = "character", base = "character"), + definition = function(x, base, ..., project, verbose = TRUE) { # check arguments - assert_that(is.string(x)) - assert_that(is.count(n_cluster)) + assert_that( + is.string(x), is.string(base), is.string(project), noNA(x), noNA(base), + noNA(project) + ) + stopifnot("`base` is not a existing directory" = file_test("-d", base)) + + target <- file.path(base, project, "results", sprintf("%s.rds", x)) + dirname(target) |> + dir.create(showWarnings = FALSE, recursive = TRUE) # x is an existing file - if (file_test("-f", x)) { - display(verbose, x) - return(get_result(x = readRDS(x), verbose = verbose, ...)) + if (file_test("-f", target)) { + display(verbose = verbose, paste(" already extracted", x)) + return(readRDS(target)) } + display(verbose = verbose, paste(" extracting", x)) - if (!file_test("-d", x)) { - stop("'x' is neither an existing file, neither an existing directory") + read_model(x = x, base = base, project = project) |> + get_result( + base = base, project = project, ..., verbose = verbose + ) -> result + if (status(result) == "converged") { + saveRDS(result, file = target) } + return(result) + } +) - # x is an existing directory - x <- normalizePath(x, winslash = "/", mustWork = TRUE) - files <- list.files( - path = x, - pattern = "\\.rds$", - full.names = TRUE, - recursive = TRUE - ) - if (length(files) == 0) { - return(new("n2kResult")) - } - if (n_cluster == 1 || !requireNamespace("parallel", quietly = TRUE)) { - result <- lapply(files, get_result, verbose = verbose, ...) - } else { - n_cluster <- min(n_cluster, parallel::detectCores()) - display( - verbose, - paste("Reading results in parallel on", n_cluster, "clusters") - ) - cl <- parallel::makeCluster(n_cluster) - result <- parallel::clusterApplyLB( - cl = cl, - x = files, - fun = get_result, - verbose = verbose, - ... - ) - parallel::stopCluster(cl) +#' @rdname get_result +#' @importFrom methods setMethod validObject new +#' @importFrom assertthat assert_that is.string noNA +#' @importFrom utils file_test +setMethod( + f = "get_result", + signature = signature(x = "character", base = "s3_bucket"), + definition = function(x, base, ..., project, verbose = TRUE) { + # check arguments + assert_that(is.string(x), is.string(project), noNA(x), noNA(project)) + target <- sprintf("%s/results/%s.rds", project, x) + result <- get_bucket(bucket = base, prefix = target, max = 1) + if (length(result) == 1) { + display(verbose = verbose, paste(" already extracted", x)) + return(s3readRDS(result$Contents)) } - - display(verbose, "Combining results") - result <- do.call(combine, result) - - return(result) + stopifnot(length(result) == 0) + display(verbose = verbose, paste(" extracting", x)) + substring(x, 1, 4) |> + sprintf(fmt = "%2$s/%1$s", project) |> + get_bucket(bucket = base, max = Inf) -> available + available <- available[ + map_chr(available, "Key") |> + grepl(pattern = x) + ] + stopifnot( + "object not found or multiple objects found" = length(available) == 1 + ) + get_result( + available[[1]], base = base, project = project, verbose = verbose, ... + ) } ) diff --git a/R/get_result_n2k_inla.R b/R/get_result_n2k_inla.R index 8acc855e..8e1d7d28 100644 --- a/R/get_result_n2k_inla.R +++ b/R/get_result_n2k_inla.R @@ -11,7 +11,7 @@ setMethod( f = "get_result", signature = signature(x = "n2kInla"), - definition = function(x, verbose = TRUE, ...) { + definition = function(x, base, ..., verbose = TRUE) { validObject(x) anomaly <- get_anomaly(analysis = x, verbose = verbose, ...) if (is.null(x@LinearCombination)) { diff --git a/R/get_result_n2kmanifest.R b/R/get_result_n2kmanifest.R index b7cb219d..db10ddbc 100644 --- a/R/get_result_n2kmanifest.R +++ b/R/get_result_n2kmanifest.R @@ -7,74 +7,17 @@ setMethod( f = "get_result", signature = signature(x = "n2kManifest"), - definition = function(x, ..., base, project, verbose = TRUE) { - assert_that( - inherits(base, "s3_bucket"), is.string(project), noNA(project), - validObject(x), is.flag(verbose), noNA(verbose) - ) - manifest <- order_manifest(manifest = x) - data.frame( - object = manifest, - status = map_chr(manifest, get_result_s3, base = base, project = project) - ) + definition = function(x, base, ..., verbose = TRUE) { + assert_that(validObject(x)) + display(verbose = verbose, paste("Handle manifest", x@Fingerprint)) + order_manifest(manifest = x) |> + vapply( + FUN = function(hash, base, verbose, ...) { + list(get_result(x = hash, base = base, ..., verbose = verbose)) + }, + FUN.VALUE = vector(mode = "list", length = 1), base = base, + verbose = verbose, ... + ) |> + do.call(what = combine) } ) - -#' @importFrom assertthat assert_that -#' @importFrom aws.s3 get_bucket s3saveRDS -#' @importFrom purrr map_chr -get_result_s3 <- function(hash, base, project, verbose = TRUE) { - display(verbose = verbose, paste(Sys.time(), hash), linefeed = FALSE) - - target <- sprintf("%s/results/%s.rds", project, hash) - if (length(get_bucket(bucket = base, prefix = target, max = 1)) > 0) { - display(verbose = verbose, " already done") - return("converged") - } - substring(hash, 1, 4) |> - sprintf(fmt = "%2$s/%1$s", project) |> - get_bucket(bucket = base, max = Inf) |> - map_chr("Key") -> available - available <- available[grepl(hash, available)] - if (length(available) != 1) { - display(verbose = verbose, " object not found or multiple objects found") - return("object problem") - } - substring(hash, 1, 4) |> - sprintf(fmt = "%2$s/%1$s/(\\w+)/%3$s.rds", project, hash) |> - gsub(replacement = "\\1", available) -> hash_status - display(verbose = verbose, sprintf(" %s", hash_status)) - if (hash_status != "converged") { - return(hash_status) - } - display(verbose = verbose, " downloading object", FALSE) - x <- read_model(x = hash, base = base, project = project) - display(verbose = verbose, " done") - x <- try(get_result(x)) - if (inherits(x, "try-error")) { - return("get_result() failed") - } - - # try several times to write to S3 bucket - # avoids errors due to time out - i <- 1 - repeat { - bucket_ok <- tryCatch( - s3saveRDS(x, bucket = base, object = target, multipart = TRUE), - error = function(err) { - err - } - ) - if (is.logical(bucket_ok)) { - break - } - stopifnot("Unable to write to S3 bucket" = i <= 10) - message("attempt ", i, " to write to S3 bucket failed. Trying again...") - i <- i + 1 - # waiting time between tries increases with the number of tries - Sys.sleep(i) - } - rm(x) - gc(verbose = FALSE) - return("converged") -} diff --git a/R/get_result_s3_object.R b/R/get_result_s3_object.R index 743799e9..d9d1b9aa 100644 --- a/R/get_result_s3_object.R +++ b/R/get_result_s3_object.R @@ -4,8 +4,33 @@ setMethod( f = "get_result", signature = signature(x = "s3_object"), - definition = function(x, ...) { + definition = function(x, base, ..., project, verbose = TRUE) { x <- s3readRDS(object = x) - get_result(x, ...) + result <- get_result(x, ..., project = project, verbose = verbose) + if (status(result) == "converged") { + target <- sprintf("%s/results/%s.rds", project, get_file_fingerprint(x)) + # try several times to write to S3 bucket + # avoids errors due to time out + i <- 1 + repeat { + bucket_ok <- tryCatch( + s3saveRDS(x, bucket = base, object = target, multipart = TRUE), + error = function(err) { + err + } + ) + if (is.logical(bucket_ok)) { + break + } + stopifnot("Unable to write to S3 bucket" = i <= 10) + message("attempt ", i, " to write to S3 bucket failed. Trying again...") + i <- i + 1 + # waiting time between tries increases with the number of tries + Sys.sleep(i) + } + } + rm(x) + gc(verbose = FALSE) + return(result) } ) diff --git a/man/get_result.Rd b/man/get_result.Rd index 37eea179..83891dbb 100644 --- a/man/get_result.Rd +++ b/man/get_result.Rd @@ -5,38 +5,38 @@ \docType{methods} \name{get_result} \alias{get_result} -\alias{get_result,character-method} -\alias{get_result,n2kInla-method} -\alias{get_result,n2kModel-method} -\alias{get_result,n2kManifest-method} -\alias{get_result,s3_object-method} +\alias{get_result,character,character-method} +\alias{get_result,character,s3_bucket-method} +\alias{get_result,n2kInla,ANY-method} +\alias{get_result,n2kModel,ANY-method} +\alias{get_result,n2kManifest,ANY-method} +\alias{get_result,s3_object,ANY-method} \title{Add the results from an analysis} \usage{ -get_result(x, ...) +get_result(x, base, ...) -\S4method{get_result}{character}(x, n_cluster = 1, verbose = TRUE, ...) +\S4method{get_result}{character,character}(x, base, ..., project, verbose = TRUE) -\S4method{get_result}{n2kInla}(x, verbose = TRUE, ...) +\S4method{get_result}{character,s3_bucket}(x, base, ..., project, verbose = TRUE) -\S4method{get_result}{n2kModel}(x, verbose = TRUE, ...) +\S4method{get_result}{n2kInla,ANY}(x, base, ..., verbose = TRUE) -\S4method{get_result}{n2kManifest}(x, ..., base, project, verbose = TRUE) +\S4method{get_result}{n2kModel,ANY}(x, verbose = TRUE, ...) -\S4method{get_result}{s3_object}(x, ...) +\S4method{get_result}{n2kManifest,ANY}(x, base, ..., verbose = TRUE) + +\S4method{get_result}{s3_object,ANY}(x, base, ..., project, verbose = TRUE) } \arguments{ \item{x}{object with the current results} +\item{base}{the base location to read the model} + \item{...}{further arguments (see Details)} -\item{n_cluster}{The number of clusters to run this function in parallel. -Defaults to \code{1} (= no parallel computing).} +\item{project}{will be a relative path within the base location} \item{verbose}{Print extra information on the screen} - -\item{base}{the base location to read the model} - -\item{project}{will be a relative path within the base location} } \description{ Add the results from an analysis From bd47ee18c1e7901d4dffdca021348743e702b455 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 4 Feb 2025 10:09:45 +0100 Subject: [PATCH 02/16] =?UTF-8?q?=E2=9C=85=20Update=20unit=20tests=20to=20?= =?UTF-8?q?changes=20in=20get=5Fresult()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/testthat/test_cba_fit_model_manifest.R | 69 ++++++++++++++------ tests/testthat/test_eaa_get_result.R | 53 +++++++++++---- 2 files changed, 90 insertions(+), 32 deletions(-) diff --git a/tests/testthat/test_cba_fit_model_manifest.R b/tests/testthat/test_cba_fit_model_manifest.R index 2f0f0ec8..28c44cde 100644 --- a/tests/testthat/test_cba_fit_model_manifest.R +++ b/tests/testthat/test_cba_fit_model_manifest.R @@ -29,7 +29,7 @@ test_that("it handles a manifest", { store_model(object, base = base, project = project) store_model(object2, base = base, project = project) store_model(object3, base = base, project = project) - x <- data.frame( + manif <- data.frame( fingerprint = c( get_file_fingerprint(object), get_file_fingerprint(object2), get_file_fingerprint(object3) @@ -41,22 +41,46 @@ test_that("it handles a manifest", { ) |> n2k_manifest() hash <- store_manifest_yaml( - x = x, base = base, project = project, docker = "inbobmk/rn2k:dev-0.10", + x = manif, base = base, project = project, docker = "inbobmk/rn2k:dev-0.10", dependencies = c("inbo/n2khelper@v0.5.0", "inbo/n2kanalysis@0.4.0") ) script <- manifest_yaml_to_bash( base = base, project = project, hash = basename(hash) ) - expect_invisible(fit_model(x, base = base, project = project)) - y <- store_manifest(x, base, project) + results <- get_result( + x = manif, base = base, project = project, verbose = FALSE + ) + expect_s4_class(results, "n2kResult") + expect_identical( + sort(results@AnalysisMetadata$file_fingerprint), + sort(manif@Manifest$fingerprint) + ) + expect_true(all(status(results) == "new")) + expect_invisible( + fit_model(manif, base = base, project = project, verbose = FALSE) + ) + y <- store_manifest(manif, base, project) expect_null(fit_model(y, base = base, project = project)) expect_null(fit_model(y)) + results <- get_result( + x = manif, base = base, project = project, verbose = FALSE + ) + expect_s4_class(results, "n2kResult") + expect_identical( + sort(results@AnalysisMetadata$file_fingerprint), + sort(manif@Manifest$fingerprint) + ) + expect_true(all(status(results) == "converged")) + expect_s4_class( + results <- get_result(x = manif, base = base, project = project), + "n2kResult" + ) file.path(base, project) |> list.files(recursive = TRUE, full.names = TRUE) |> c( R_user_dir("n2kanalysis", which = "cache") |> - file.path(x@Fingerprint) + file.path(manif@Fingerprint) ) |> file.remove() @@ -67,7 +91,7 @@ test_that("it handles a manifest", { store_model(object, base = aws_base, project = project) store_model(object2, base = aws_base, project = project) store_model(object3, base = aws_base, project = project) - x <- data.frame( + manif <- data.frame( fingerprint = c( get_file_fingerprint(object), get_file_fingerprint(object2), get_file_fingerprint(object3) @@ -79,35 +103,42 @@ test_that("it handles a manifest", { ) |> n2k_manifest() hash <- store_manifest_yaml( - x = x, base = aws_base, project = project, docker = "inbobmk/rn2k:dev-0.10", + x = manif, base = aws_base, project = project, + docker = "inbobmk/rn2k:dev-0.10", dependencies = c("inbo/n2khelper@v0.5.0", "inbo/n2kanalysis@0.4.0") ) script <- manifest_yaml_to_bash( base = aws_base, project = project, hash = basename(hash) ) - expect_s3_class( - results <- get_result(x, base = aws_base, project = project), - "data.frame" + results <- get_result( + x = manif, base = aws_base, project = project, verbose = FALSE + ) + expect_s4_class(results, "n2kResult") + expect_identical( + sort(results@AnalysisMetadata$file_fingerprint), + sort(manif@Manifest$fingerprint) ) - expect_true(all(results$status == "new")) + expect_true(all(status(results) == "new")) expect_invisible( - fit_model(x, base = aws_base, project = project, verbose = TRUE) + fit_model(manif, base = aws_base, project = project, verbose = FALSE) ) - expect_s3_class( - results <- get_result(x, base = aws_base, project = project), - "data.frame" + results <- get_result(x = manif, base = aws_base, project = project) + expect_s4_class(results, "n2kResult") + expect_identical( + sort(results@AnalysisMetadata$file_fingerprint), + sort(manif@Manifest$fingerprint) ) - expect_true(all(results$status == "converged")) + expect_true(all(status(results) == "converged")) - y <- store_manifest(x, base = aws_base, project = project) + y <- store_manifest(manif, base = aws_base, project = project) expect_invisible(fit_model(y$Contents)) expect_null(fit_model(y$Contents$Key, base = aws_base, project = project)) - available <- get_bucket(aws_base, prefix = project) %>% + available <- get_bucket(aws_base, prefix = project) |> sapply("[[", "Key") expect_true(all(sapply(available, delete_object, bucket = aws_base))) R_user_dir("n2kanalysis", which = "cache") |> - file.path(x@Fingerprint) |> + file.path(manif@Fingerprint) |> file.remove() }) diff --git a/tests/testthat/test_eaa_get_result.R b/tests/testthat/test_eaa_get_result.R index 752c4c0c..b5989092 100644 --- a/tests/testthat/test_eaa_get_result.R +++ b/tests/testthat/test_eaa_get_result.R @@ -1,9 +1,4 @@ test_that("get_result on n2kInla", { - expect_error( - get_result("junk"), - "'x' is neither an existing file, neither an existing directory" - ) - temp_dir <- tempfile("get_result_n2kInla") dir.create(temp_dir) this_result_datasource_id <- sha1(sample(letters)) @@ -43,7 +38,11 @@ test_that("get_result on n2kInla", { filename <- store_model(analysis, base = temp_dir, project = "get_result") expect_equal( - get_result(filename, datasource_id = this_datasource, verbose = FALSE), + basename(filename) |> + get_result( + base = temp_dir, project = "get_result", + datasource_id = this_datasource, verbose = FALSE + ), result ) fit_model(filename, verbose = FALSE) @@ -55,7 +54,11 @@ test_that("get_result on n2kInla", { expect_identical(nrow(result@Contrast), 0L) expect_lt(0, nrow(result@Anomaly)) expect_equal( - get_result(filename, datasource_id = this_datasource, verbose = FALSE), + basename(filename) |> + get_result( + base = temp_dir, project = "get_result", + datasource_id = this_datasource, verbose = FALSE + ), result ) @@ -89,7 +92,11 @@ test_that("get_result on n2kInla", { filename <- store_model(analysis, base = temp_dir, project = "get_result") expect_equal( - get_result(filename, datasource_id = this_datasource, verbose = FALSE), + basename(filename) |> + get_result( + base = temp_dir, project = "get_result", + datasource_id = this_datasource, verbose = FALSE + ), result2 ) fit_model(filename, verbose = FALSE) @@ -103,7 +110,11 @@ test_that("get_result on n2kInla", { expect_lt(0, nrow(result2@ContrastEstimate)) expect_lt(0, nrow(result2@Anomaly)) expect_equal( - get_result(filename, datasource_id = this_datasource, verbose = FALSE), + basename(filename) |> + get_result( + base = temp_dir, project = "get_result", + datasource_id = this_datasource, verbose = FALSE + ), result2 ) @@ -131,7 +142,11 @@ test_that("get_result on n2kInla", { expect_identical(nrow(result3@Anomaly), 0L) filename <- store_model(analysis, base = temp_dir, project = "get_result") expect_equal( - get_result(filename, datasource_id = this_datasource, verbose = FALSE), + basename(filename) |> + get_result( + base = temp_dir, project = "get_result", + datasource_id = this_datasource, verbose = FALSE + ), result3 ) fit_model(filename, verbose = FALSE) @@ -146,7 +161,11 @@ test_that("get_result on n2kInla", { expect_lt(0, nrow(result3@ContrastEstimate)) expect_lt(0, nrow(result3@Anomaly)) expect_equal( - get_result(filename, datasource_id = this_datasource, verbose = FALSE), + basename(filename) |> + get_result( + base = temp_dir, project = "get_result", + datasource_id = this_datasource, verbose = FALSE + ), result3 ) @@ -192,7 +211,11 @@ test_that("get_result on n2kInla", { expect_identical(nrow(result4@Anomaly), 0L) filename <- store_model(analysis, base = temp_dir, project = "get_result") expect_equal( - get_result(filename, datasource_id = this_datasource, verbose = FALSE), + basename(filename) |> + get_result( + base = temp_dir, project = "get_result", + datasource_id = this_datasource, verbose = FALSE + ), result4 ) fit_model(filename, verbose = FALSE) @@ -206,7 +229,11 @@ test_that("get_result on n2kInla", { expect_lt(0, nrow(result4@ContrastEstimate)) expect_lt(0, nrow(result4@Anomaly)) expect_equal( - get_result(filename, datasource_id = this_datasource, verbose = FALSE), + basename(filename) |> + get_result( + base = temp_dir, project = "get_result", + datasource_id = this_datasource, verbose = FALSE + ), result4 ) From 943f6501cef8a77843911a9534c78ac8bf063f28 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 4 Feb 2025 10:12:18 +0100 Subject: [PATCH 03/16] =?UTF-8?q?=F0=9F=94=96=20Bump=20package=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .zenodo.json | 2 +- CITATION.cff | 2 +- DESCRIPTION | 2 +- NEWS.md | 4 ++++ inst/CITATION | 4 ++-- 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.zenodo.json b/.zenodo.json index 1d1a09aa..1011cc8f 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,6 +1,6 @@ { "title": "n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring", - "version": "0.4.0", + "version": "0.4.1", "license": "GPL-3.0", "upload_type": "software", "description": "

All generic functions and classes for the analysis for the ‘Natura 2000’ monitoring. The classes contain all required data and definitions to fit the model without the need to access other sources. Potentially they might need access to one or more parent objects. An aggregation object might for example need the result of an imputation object. The actual definition of the analysis, using these generic function and classes, is defined in dedictated analysis R packages for every monitoring scheme. For example ‘abvanalysis’ and ‘watervogelanalysis’.<\/p>", diff --git a/CITATION.cff b/CITATION.cff index 6f445561..94a2845e 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -29,4 +29,4 @@ identifiers: value: 10.5281/zenodo.3576047 - type: url value: https://inbo.github.io/n2kanalysis/ -version: 0.4.0 +version: 0.4.1 diff --git a/DESCRIPTION b/DESCRIPTION index f5d110de..7237f9ab 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: n2kanalysis Title: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring -Version: 0.4.0 +Version: 0.4.1 Authors@R: c( person("Thierry", "Onkelinx", , "thierry.onkelinx@inbo.be", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8804-4216", affiliation = "Research Institute for Nature and Forest (INBO)")), diff --git a/NEWS.md b/NEWS.md index e139687a..0e32bc1e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# `n2kanalysis` 0.4.1 + +* Improve `get_result()` + # `n2kanalysis` 0.4.0 * Handle INLA models with an SPDE element. diff --git a/inst/CITATION b/inst/CITATION index 8146bc88..646e7e4a 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -2,12 +2,12 @@ citHeader("To cite `n2kanalysis` in publications please use:") # begin checklist entry bibentry( bibtype = "Manual", - title = "n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring. Version 0.4.0", + title = "n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring. Version 0.4.1", author = c( author = c(person(given = "Thierry", family = "Onkelinx"))), year = 2025, url = "https://inbo.github.io/n2kanalysis/", abstract = "All generic functions and classes for the analysis for the 'Natura 2000' monitoring. The classes contain all required data and definitions to fit the model without the need to access other sources. Potentially they might need access to one or more parent objects. An aggregation object might for example need the result of an imputation object. The actual definition of the analysis, using these generic function and classes, is defined in dedictated analysis R packages for every monitoring scheme. For example 'abvanalysis' and 'watervogelanalysis'.", - textVersion = "Onkelinx, Thierry (2025) n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring. Version 0.4.0. https://inbo.github.io/n2kanalysis/", + textVersion = "Onkelinx, Thierry (2025) n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring. Version 0.4.1. https://inbo.github.io/n2kanalysis/", keywords = "analysis, reproducible research, natura 2000, monitoring", doi = "10.5281/zenodo.3576047", ) From 279bfbbd0cd153547dc0863d9d7a3eb9f5d6c1d4 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 5 Feb 2025 12:40:27 +0100 Subject: [PATCH 04/16] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Add=20write=5Fs3=5Ff?= =?UTF-8?q?un()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DESCRIPTION | 1 + R/store_manifest.R | 65 ++++--------------- R/store_manifest_yaml.R | 45 ++++--------- R/write_s3_fun.R | 41 ++++++++++++ tests/testthat/test_baa_store_manifest.R | 25 ++++--- tests/testthat/test_bba_read_manifest.R | 4 +- tests/testthat/test_bbb_store_manifest_yaml.R | 2 +- tests/testthat/test_cba_fit_model_manifest.R | 16 +++-- 8 files changed, 87 insertions(+), 112 deletions(-) create mode 100644 R/write_s3_fun.R diff --git a/DESCRIPTION b/DESCRIPTION index 7237f9ab..5ec3bf19 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -169,3 +169,4 @@ Collate: 'store_model.R' 'union.R' 'valid_object.R' + 'write_s3_fun.R' diff --git a/R/store_manifest.R b/R/store_manifest.R index 528108ab..9dbe21b9 100644 --- a/R/store_manifest.R +++ b/R/store_manifest.R @@ -29,18 +29,14 @@ setMethod( validObject(x, complete = TRUE) #create dir is it doesn't exist - dir <- file.path(base, project, "manifest") %>% + dir <- file.path(base, project, "manifest") |> normalizePath(winslash = "/", mustWork = FALSE) - if (!dir.exists(dir)) { - dir.create(dir, recursive = TRUE) - } + dir.create(dir, recursive = TRUE, showWarnings = FALSE) #test if file exists fingerprint <- get_file_fingerprint(x) filename <- list.files( - dir, - pattern = sprintf("%s.manifest$", fingerprint), - full.names = TRUE + dir, pattern = sprintf("%s.manifest$", fingerprint), full.names = TRUE ) if (length(filename) > 0) { return(normalizePath(filename, winslash = "/")) @@ -53,62 +49,23 @@ setMethod( #' @rdname store_manifest #' @importFrom methods setMethod new -#' @importFrom assertthat assert_that is.string -#' @importFrom aws.s3 bucket_exists get_bucket s3write_using +#' @importFrom assertthat assert_that is.string noNA #' @importFrom utils write.table #' @include import_s3_classes.R setMethod( f = "store_manifest", signature = signature(base = "s3_bucket"), definition = function(x, base, project) { - assert_that(inherits(x, "n2kManifest")) - assert_that(is.string(project)) + assert_that(inherits(x, "n2kManifest"), is.string(project), noNA(project)) validObject(x, complete = TRUE) filename <- file.path( - project, "manifest", sprintf( - "%s.manifest", - get_file_fingerprint(x) - ), fsep = "/" + fsep = "/", project, "manifest", + sprintf("%s.manifest", get_file_fingerprint(x)) + ) + write_s3_fun( + object = x@Manifest, bucket = base, key = filename, overwrite = FALSE, + row.names = FALSE, sep = "\t" ) - # check if object with same fingerprint exists - existing <- get_bucket(base, prefix = filename) - if (length(existing) > 0) { - return(existing) - } - - # create object if it doesn't exists - # try several times to write to S3 bucket - # avoids errors due to time out - i <- 1 - repeat { - bucket_ok <- tryCatch( - s3write_using( - x@Manifest, - write.table, - row.names = FALSE, - sep = "\t", - bucket = base, - object = filename - ), - error = function(err) { - err - } - ) - if (is.logical(bucket_ok)) { - break - } - if (i > 10) { - stop("Unable to write to S3 bucket") - } - message("attempt ", i, " to write to S3 bucket failed. Trying again...") - i <- i + 1 - # waiting time between tries increases with the number of tries - Sys.sleep(i) - } - if (!bucket_ok) { - stop("Unable to write to S3 bucket") - } - get_bucket(base, prefix = filename) } ) diff --git a/R/store_manifest_yaml.R b/R/store_manifest_yaml.R index 033156ce..f416d1be 100644 --- a/R/store_manifest_yaml.R +++ b/R/store_manifest_yaml.R @@ -17,52 +17,31 @@ setGeneric( #' @export #' @rdname store_manifest_yaml #' @importFrom methods setMethod new -#' @importFrom assertthat assert_that is.string -#' @importFrom dplyr %>% +#' @importFrom assertthat assert_that is.string noNA #' @importFrom purrr map_chr #' @importFrom yaml write_yaml setMethod( f = "store_manifest_yaml", signature = signature(base = "s3_bucket"), definition = function(x, base, project, docker, dependencies) { - assert_that(is.string(docker)) - assert_that(is.character(dependencies)) + assert_that( + is.string(docker), is.character(dependencies), noNA(dependencies), + noNA(docker) + ) stored <- store_manifest(x = x, base = base, project = project) list( github = dependencies, docker = docker, bucket = attr(base, "Name"), project = project, - hash = basename(stored$Contents$Key) |> + hash = basename(stored) |> gsub(pattern = "\\.manifest", replacement = "") ) -> yaml filename <- sprintf("%s/yaml/%s.yaml", project, sha1(yaml)) - available <- get_bucket(base, prefix = filename, max = Inf) - if (length(available)) { - return(map_chr(available, "Key")) - } - # try several times to write to S3 bucket - # avoids errors due to time out - i <- 1 - repeat { - bucket_ok <- tryCatch( - s3write_using(yaml, write_yaml, bucket = base, object = filename), - error = function(err) { - err - } - ) - if (is.logical(bucket_ok)) { - break - } - stopifnot("Unable to write to S3 bucket" = i <= 10) - message("attempt ", i, " to write to S3 bucket failed. Trying again...") - i <- i + 1 - # waiting time between tries increases with the number of tries - Sys.sleep(i) - } - stopifnot("Unable to write to S3 bucket" = bucket_ok) - get_bucket(base, prefix = filename, max = Inf) |> - map_chr("Key") + write_s3_fun( + object = yaml, bucket = base, key = filename, overwrite = FALSE, + fun = write_yaml + ) } ) @@ -89,9 +68,7 @@ setMethod( if (file.exists(filename)) { return(filename) } - if (!dir.exists(dirname(filename))) { - dir.create(dirname(filename), recursive = TRUE) - } + dir.create(dirname(filename), recursive = TRUE, showWarnings = FALSE) write_yaml(yaml, filename) return(filename) } diff --git a/R/write_s3_fun.R b/R/write_s3_fun.R new file mode 100644 index 00000000..ccf5c29d --- /dev/null +++ b/R/write_s3_fun.R @@ -0,0 +1,41 @@ +#' @importFrom assertthat assert_that is.count is.flag noNA +#' @importFrom aws.s3 bucket_exists get_bucket s3write_using +#' @importFrom purrr map_chr +write_s3_fun <- function( + object, bucket, key, fun = write.table, overwrite = FALSE, opts = NULL, ..., + max_attempt = 10 +) { + assert_that(is.flag(overwrite), noNA(overwrite), is.count(max_attempt)) + # check if object with same fingerprint exists in case we don't overwrite + existing <- get_bucket(bucket, prefix = key) + if (!overwrite && length(existing) > 0) { + return(unname(map_chr(existing, "Key"))) + } + + # create object if it doesn't exists or we want to overwrite + # try several times to write to S3 bucket + # avoids errors due to time out + i <- 1 + repeat { + bucket_ok <- tryCatch( + s3write_using( + x = object, FUN = fun, bucket = bucket, object = key, opts = opts, ... + ), + error = function(err) { + err + } + ) + if (is.logical(bucket_ok)) { + break + } + stopifnot("Unable to write to S3 bucket" = i <= max_attempt) + message("attempt ", i, " to write to S3 bucket failed. Trying again...") + i <- i + 1 + # waiting time between tries increases with the number of tries + Sys.sleep(i) + } + stopifnot("Unable to write to S3 bucket" = bucket_ok) + get_bucket(bucket, prefix = key) |> + map_chr("Key") |> + unname() +} diff --git a/tests/testthat/test_baa_store_manifest.R b/tests/testthat/test_baa_store_manifest.R index b0722371..db6f4f00 100644 --- a/tests/testthat/test_baa_store_manifest.R +++ b/tests/testthat/test_baa_store_manifest.R @@ -27,28 +27,25 @@ test_that("store_manifest stores the manifest on a local file system", { test_that("store_manifest stores the manifest on an S3 bucket", { skip_if(Sys.getenv("AWS_SECRET_ACCESS_KEY") == "", message = "No AWS access") - bucket <- get_bucket(Sys.getenv("N2KBUCKET")) + project <- "unittest_store_manifest" + bucket <- get_bucket(Sys.getenv("N2KBUCKET"), prefix = project, max = 1) object <- n2k_manifest( data.frame( fingerprint = "1", parent = NA_character_, stringsAsFactors = FALSE ) ) - expect_is( - stored <- store_manifest( - x = object, base = bucket, project = "unittest_store_manifest" - ), - "s3_bucket" + expect_type( + stored <- store_manifest(x = object, base = bucket, project = project), + "character" ) - available <- get_bucket(bucket, prefix = "unittest_store_manifest") - expect_equivalent(stored, available) - expect_is( - stored2 <- store_manifest( - x = object, base = bucket, project = "unittest_store_manifest" - ), - "s3_bucket" + available <- get_bucket(bucket, prefix = project) + expect_equivalent(stored, map_chr(available, "Key")) + expect_type( + stored2 <- store_manifest(x = object, base = bucket, project = project), + "character" ) available <- get_bucket(bucket, prefix = "unittest_store_manifest") - expect_equivalent(stored2, available) + expect_equivalent(stored2, map_chr(available, "Key")) expect_equivalent(stored, stored2) expect_true(all(sapply(available, delete_object, bucket = bucket))) }) diff --git a/tests/testthat/test_bba_read_manifest.R b/tests/testthat/test_bba_read_manifest.R index 4225295d..f7c3290a 100644 --- a/tests/testthat/test_bba_read_manifest.R +++ b/tests/testthat/test_bba_read_manifest.R @@ -41,8 +41,8 @@ test_that("read_manifest reads the manifest on a local file system", { test_that("read_manifest reads the manifest on an S3 bucket", { skip_if(Sys.getenv("AWS_SECRET_ACCESS_KEY") == "", message = "No AWS access") - bucket <- get_bucket(Sys.getenv("N2KBUCKET")) project <- "unittest_read_manifest" + bucket <- get_bucket(Sys.getenv("N2KBUCKET"), prefix = project, max = 1) object <- n2k_manifest( data.frame( fingerprint = "1", parent = NA_character_, stringsAsFactors = FALSE @@ -60,7 +60,7 @@ test_that("read_manifest reads the manifest on an S3 bucket", { ) Sys.sleep(2) stored <- store_manifest(object2, bucket, project) - expect_equal(read_manifest(bucket, hash = stored$Contents$Key), object2) + expect_equal(read_manifest(bucket, hash = stored), object2) expect_equal(read_manifest(bucket, project, object2@Fingerprint), object2) latest <- read_manifest(bucket, project) expect_equal(latest, object2) diff --git a/tests/testthat/test_bbb_store_manifest_yaml.R b/tests/testthat/test_bbb_store_manifest_yaml.R index f8fd19ba..ab807b5e 100644 --- a/tests/testthat/test_bbb_store_manifest_yaml.R +++ b/tests/testthat/test_bbb_store_manifest_yaml.R @@ -1,13 +1,13 @@ context("store_manifest_yaml") test_that("store_manifest_yaml stores the manifest on an S3 bucket", { skip_if(Sys.getenv("AWS_SECRET_ACCESS_KEY") == "", message = "No AWS access") - bucket <- get_bucket(Sys.getenv("N2KBUCKET")) object <- n2k_manifest( data.frame( fingerprint = "1", parent = NA_character_, stringsAsFactors = FALSE ) ) project <- "unittest_store_manifest_yaml" + bucket <- get_bucket(Sys.getenv("N2KBUCKET"), prefix = project, max = 1) docker <- "inbobmk/rn2k:latest" dependencies <- c("inbo/n2khelper@v0.4.1", "inbo/n2kanalysis@docker") expect_is( diff --git a/tests/testthat/test_cba_fit_model_manifest.R b/tests/testthat/test_cba_fit_model_manifest.R index 28c44cde..fd3d2fd1 100644 --- a/tests/testthat/test_cba_fit_model_manifest.R +++ b/tests/testthat/test_cba_fit_model_manifest.R @@ -60,8 +60,8 @@ test_that("it handles a manifest", { fit_model(manif, base = base, project = project, verbose = FALSE) ) y <- store_manifest(manif, base, project) - expect_null(fit_model(y, base = base, project = project)) - expect_null(fit_model(y)) + expect_null(fit_model(y, base = base, project = project, verbose = FALSE)) + expect_null(fit_model(y, verbose = FALSE)) results <- get_result( x = manif, base = base, project = project, verbose = FALSE ) @@ -72,7 +72,9 @@ test_that("it handles a manifest", { ) expect_true(all(status(results) == "converged")) expect_s4_class( - results <- get_result(x = manif, base = base, project = project), + results <- get_result( + x = manif, base = base, project = project, verbose = FALSE + ), "n2kResult" ) @@ -122,7 +124,9 @@ test_that("it handles a manifest", { expect_invisible( fit_model(manif, base = aws_base, project = project, verbose = FALSE) ) - results <- get_result(x = manif, base = aws_base, project = project) + results <- get_result( + x = manif, base = aws_base, project = project, verbose = FALSE + ) expect_s4_class(results, "n2kResult") expect_identical( sort(results@AnalysisMetadata$file_fingerprint), @@ -131,9 +135,7 @@ test_that("it handles a manifest", { expect_true(all(status(results) == "converged")) y <- store_manifest(manif, base = aws_base, project = project) - expect_invisible(fit_model(y$Contents)) - - expect_null(fit_model(y$Contents$Key, base = aws_base, project = project)) + expect_null(fit_model(y, base = aws_base, project = project, verbose = FALSE)) available <- get_bucket(aws_base, prefix = project) |> sapply("[[", "Key") From 92083787d2e12d9775869bc0a1553303ba53ff60 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 5 Feb 2025 16:29:47 +0100 Subject: [PATCH 05/16] =?UTF-8?q?=F0=9F=A5=85=20Read=5Fmanifest()=20valida?= =?UTF-8?q?tes=20fingerprint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/read_manifest.R | 91 +++++++++++++------------ tests/testthat/test_bba_read_manifest.R | 15 ++-- 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/R/read_manifest.R b/R/read_manifest.R index ee961644..1d1b9e3b 100644 --- a/R/read_manifest.R +++ b/R/read_manifest.R @@ -27,9 +27,7 @@ setMethod( f = "read_manifest", signature = signature(base = "character"), definition = function(base, project, hash) { - assert_that(is.string(base)) - assert_that(file_test("-d", base)) - assert_that(is.string(project)) + assert_that(is.string(base), file_test("-d", base), is.string(project)) #check dir if exists dir <- file.path(base, project, "manifest") %>% @@ -40,48 +38,46 @@ setMethod( ) available <- list.files( - dir, - pattern = "\\.manifest$", - full.names = TRUE, - ignore.case = TRUE + dir, pattern = "\\.manifest$", full.names = TRUE, ignore.case = TRUE + ) + assert_that( + length(available) > 0, msg = paste0("No manifest files in '", dir, "'") ) - if (length(available) == 0) { - stop("No manifest files in '", dir, "'") - } if (missing(hash)) { - manifest <- file.info(available) %>% - rownames_to_column("filename") %>% - arrange(desc(.data$mtime)) %>% - slice(1) %>% - "[["("filename") %>% #nolint - read.table( - header = TRUE, - sep = "\t", - colClasses = "character", - as.is = TRUE - ) %>% - n2k_manifest() + file.info(available) |> + rownames_to_column("filename") |> + slice_max(.data$mtime, n = 1) -> latest + read.table( + latest$filename, header = TRUE, sep = "\t", colClasses = "character", + as.is = TRUE + ) |> + n2k_manifest() -> manifest + stopifnot( + "fingerprint doesn't match" = + paste0(manifest@Fingerprint, ".manifest") == basename(latest$filename) + ) return(manifest) } assert_that(is.string(hash)) selection <- grep(sprintf("manifest/%s.*\\.manifest$", hash), available) - if (length(selection) == 0) { - stop("No manifest found starting with '", hash, "'") - } - if (length(selection) > 1) { - stop("Multiple manifests found starting with '", hash, "'") - } - available[selection] %>% - read.table( - header = TRUE, - sep = "\t", - colClasses = "character", - as.is = TRUE - ) %>% - n2k_manifest() + assert_that( + length(selection) > 0, + msg = paste0("No manifest found starting with '", hash, "'") + ) + assert_that( + length(selection) == 1, + msg = paste0("Multiple manifests found starting with '", hash, "'") + ) + read.table( + available[selection], header = TRUE, sep = "\t", colClasses = "character", + as.is = TRUE + ) |> + n2k_manifest() -> manifest + stopifnot("fingerprint doesn't match" = manifest@Fingerprint == hash) + return(manifest) } ) @@ -96,7 +92,6 @@ setMethod( f = "read_manifest", signature = signature(base = "s3_bucket"), definition = function(base, project, hash) { - if (missing(hash)) { assert_that(is.string(project)) available <- get_bucket( @@ -104,24 +99,30 @@ setMethod( ) stopifnot("No manifest files in this project" = length(available) > 0) map_chr(available, "LastModified") |> - as.POSIXct() |> + as.POSIXct(format = "%Y-%m-%dT%H:%M:%OS") |> which.max() -> latest s3read_using( read.table, header = TRUE, sep = "\t", colClasses = "character", as.is = TRUE, object = available[[latest]] ) |> n2k_manifest() -> manifest + stopifnot( + "fingerprint doesn't match" = + paste0(manifest@Fingerprint, ".manifest") == + basename(available[[latest]][["Key"]]) + ) return(manifest) } assert_that(is.string(hash)) if (missing(project)) { - available <- get_bucket(bucket = base, prefix = hash) - } else { - available <- get_bucket( - bucket = base, prefix = paste(project, "manifest", hash, sep = "/") - ) + project <- gsub("(.*?)/manifest/.*", "\\1", hash) + gsub(".*?/manifest/(.*)", "\\1", hash) |> + gsub(pattern = "\\.manifest", replacement = "", x = _) -> hash } + available <- get_bucket( + bucket = base, prefix = paste(project, "manifest", hash, sep = "/") + ) assert_that( length(available) > 0, msg = sprintf("No manifest found starting with '%s'", hash) @@ -134,6 +135,8 @@ setMethod( read.table, header = TRUE, sep = "\t", colClasses = "character", as.is = TRUE, object = available[[1]] ) |> - n2k_manifest() + n2k_manifest() -> manifest + stopifnot("fingerprint doesn't match" = manifest@Fingerprint == hash) + return(manifest) } ) diff --git a/tests/testthat/test_bba_read_manifest.R b/tests/testthat/test_bba_read_manifest.R index f7c3290a..6b2c9259 100644 --- a/tests/testthat/test_bba_read_manifest.R +++ b/tests/testthat/test_bba_read_manifest.R @@ -14,8 +14,8 @@ test_that("read_manifest reads the manifest on a local file system", { expect_error( read_manifest(temp_dir, "read_manifest"), "No manifest files in" ) - file.path(temp_dir, "read_manifest", "manifest") %>% - normalizePath(mustWork = FALSE) %>% + file.path(temp_dir, "read_manifest", "manifest") |> + normalizePath(mustWork = FALSE) |> dir.create(recursive = TRUE) expect_error( read_manifest(temp_dir, "read_manifest"), "No manifest files in" @@ -34,8 +34,8 @@ test_that("read_manifest reads the manifest on a local file system", { read_manifest(temp_dir, "read_manifest", "junk"), "No manifest found starting with 'junk'" ) - file.path(temp_dir, "read_manifest") %>% - list.files(recursive = TRUE, full.names = TRUE) %>% + file.path(temp_dir, "read_manifest") |> + list.files(recursive = TRUE, full.names = TRUE) |> file.remove() }) @@ -54,10 +54,7 @@ test_that("read_manifest reads the manifest on an S3 bucket", { ) ) store_manifest(object, bucket, project) - expect_equal( - read_manifest(bucket, project, object@Fingerprint), - object - ) + expect_equal(read_manifest(bucket, project, object@Fingerprint), object) Sys.sleep(2) stored <- store_manifest(object2, bucket, project) expect_equal(read_manifest(bucket, hash = stored), object2) @@ -70,7 +67,7 @@ test_that("read_manifest reads the manifest on an S3 bucket", { "No manifest found starting with 'junk'" ) - available <- get_bucket(Sys.getenv("N2KBUCKET"), prefix = project) %>% + available <- get_bucket(Sys.getenv("N2KBUCKET"), prefix = project) |> sapply("[[", "Key") expect_true(all(sapply(available, delete_object, bucket = bucket))) expect_error(read_manifest(bucket, project), "No manifest files in") From aa9b75a2199e79b984a71f1d365bae7b1acfeead Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 5 Feb 2025 17:13:08 +0100 Subject: [PATCH 06/16] =?UTF-8?q?=E2=9C=A8=20Allow=20to=20overwrite=20exis?= =?UTF-8?q?ting=20manifests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit store_manifest() and store_manifest_yaml() gain an overwrite argument. --- R/store_manifest.R | 24 ++++++++++++++---------- R/store_manifest_yaml.R | 30 +++++++++++++++++++++--------- man/store_manifest.Rd | 9 ++++++--- man/store_manifest_yaml.Rd | 9 ++++++--- 4 files changed, 47 insertions(+), 25 deletions(-) diff --git a/R/store_manifest.R b/R/store_manifest.R index 9dbe21b9..4693f0f1 100644 --- a/R/store_manifest.R +++ b/R/store_manifest.R @@ -2,6 +2,7 @@ #' @param x the `n2kManifest` #' @param base the base location to store the manifest #' @param project will be a relative path within the base location +#' @inheritParams store_model #' @name store_manifest #' @rdname store_manifest #' @exportMethod store_manifest @@ -9,7 +10,7 @@ #' @importFrom methods setGeneric setGeneric( name = "store_manifest", - def = function(x, base, project) { + def = function(x, base, project, overwrite = FALSE) { standardGeneric("store_manifest") # nocov } ) @@ -21,11 +22,11 @@ setGeneric( setMethod( f = "store_manifest", signature = signature(base = "character"), - definition = function(x, base, project) { - assert_that(inherits(x, "n2kManifest")) - assert_that(is.string(base)) - assert_that(file_test("-d", base)) - assert_that(is.string(project)) + definition = function(x, base, project, overwrite = FALSE) { + assert_that( + inherits(x, "n2kManifest"), is.string(base), file_test("-d", base), + is.string(project), is.flag(overwrite), noNA(overwrite) + ) validObject(x, complete = TRUE) #create dir is it doesn't exist @@ -38,7 +39,7 @@ setMethod( filename <- list.files( dir, pattern = sprintf("%s.manifest$", fingerprint), full.names = TRUE ) - if (length(filename) > 0) { + if (!overwrite && length(filename) > 0) { return(normalizePath(filename, winslash = "/")) } filename <- file.path(dir, sprintf("%s.manifest", fingerprint)) @@ -55,8 +56,11 @@ setMethod( setMethod( f = "store_manifest", signature = signature(base = "s3_bucket"), - definition = function(x, base, project) { - assert_that(inherits(x, "n2kManifest"), is.string(project), noNA(project)) + definition = function(x, base, project, overwrite = FALSE) { + assert_that( + inherits(x, "n2kManifest"), is.string(project), noNA(project), + is.flag(overwrite), noNA(overwrite) + ) validObject(x, complete = TRUE) filename <- file.path( @@ -64,7 +68,7 @@ setMethod( sprintf("%s.manifest", get_file_fingerprint(x)) ) write_s3_fun( - object = x@Manifest, bucket = base, key = filename, overwrite = FALSE, + object = x@Manifest, bucket = base, key = filename, overwrite = overwrite, row.names = FALSE, sep = "\t" ) } diff --git a/R/store_manifest_yaml.R b/R/store_manifest_yaml.R index f416d1be..afc16a16 100644 --- a/R/store_manifest_yaml.R +++ b/R/store_manifest_yaml.R @@ -1,5 +1,6 @@ #' Store a Docker configuration #' @inheritParams store_manifest +#' @inheritParams store_model #' @param docker the docker image to use #' @param dependencies extra GitHub packages to install #' @name store_manifest_yaml @@ -9,7 +10,7 @@ #' @importFrom methods setGeneric setGeneric( name = "store_manifest_yaml", - def = function(x, base, project, docker, dependencies) { + def = function(x, base, project, docker, dependencies, overwrite = FALSE) { standardGeneric("store_manifest_yaml") # nocov } ) @@ -23,13 +24,17 @@ setGeneric( setMethod( f = "store_manifest_yaml", signature = signature(base = "s3_bucket"), - definition = function(x, base, project, docker, dependencies) { + definition = function( + x, base, project, docker, dependencies, overwrite = FALSE + ) { assert_that( is.string(docker), is.character(dependencies), noNA(dependencies), - noNA(docker) + noNA(docker), is.flag(overwrite), noNA(overwrite) ) - stored <- store_manifest(x = x, base = base, project = project) + stored <- store_manifest( + x = x, base = base, project = project, overwrite = overwrite + ) list( github = dependencies, docker = docker, bucket = attr(base, "Name"), project = project, @@ -39,7 +44,7 @@ setMethod( filename <- sprintf("%s/yaml/%s.yaml", project, sha1(yaml)) write_s3_fun( - object = yaml, bucket = base, key = filename, overwrite = FALSE, + object = yaml, bucket = base, key = filename, overwrite = overwrite, fun = write_yaml ) } @@ -54,10 +59,17 @@ setMethod( setMethod( f = "store_manifest_yaml", signature = signature(base = "character"), - definition = function(x, base, project, docker, dependencies) { - assert_that(is.dir(base), is.string(docker), is.character(dependencies)) + definition = function( + x, base, project, docker, dependencies, overwrite = FALSE + ) { + assert_that( + is.dir(base), is.string(docker), is.character(dependencies), + is.flag(overwrite), noNA(overwrite) + ) - stored <- store_manifest(x = x, base = base, project = project) + stored <- store_manifest( + x = x, base = base, project = project, overwrite = overwrite + ) list( github = dependencies, docker = docker, bucket = base, project = project, hash = basename(stored) |> @@ -65,7 +77,7 @@ setMethod( ) -> yaml sprintf("%s/%s/yaml/%s.yaml", base, project, sha1(yaml)) |> normalizePath(winslash = "/", mustWork = FALSE) -> filename - if (file.exists(filename)) { + if (!overwrite && file.exists(filename)) { return(filename) } dir.create(dirname(filename), recursive = TRUE, showWarnings = FALSE) diff --git a/man/store_manifest.Rd b/man/store_manifest.Rd index 835b16b5..e65112ca 100644 --- a/man/store_manifest.Rd +++ b/man/store_manifest.Rd @@ -7,11 +7,11 @@ \alias{store_manifest,ANY,s3_bucket-method} \title{Store an \code{n2kManifest} object} \usage{ -store_manifest(x, base, project) +store_manifest(x, base, project, overwrite = FALSE) -\S4method{store_manifest}{ANY,character}(x, base, project) +\S4method{store_manifest}{ANY,character}(x, base, project, overwrite = FALSE) -\S4method{store_manifest}{ANY,s3_bucket}(x, base, project) +\S4method{store_manifest}{ANY,s3_bucket}(x, base, project, overwrite = FALSE) } \arguments{ \item{x}{the \code{n2kManifest}} @@ -19,6 +19,9 @@ store_manifest(x, base, project) \item{base}{the base location to store the manifest} \item{project}{will be a relative path within the base location} + +\item{overwrite}{Should an existing object be overwritten? +Defaults to \code{TRUE}.} } \description{ Store an \code{n2kManifest} object diff --git a/man/store_manifest_yaml.Rd b/man/store_manifest_yaml.Rd index 9bdffdfa..2f88c21c 100644 --- a/man/store_manifest_yaml.Rd +++ b/man/store_manifest_yaml.Rd @@ -7,11 +7,11 @@ \alias{store_manifest_yaml,ANY,character-method} \title{Store a Docker configuration} \usage{ -store_manifest_yaml(x, base, project, docker, dependencies) +store_manifest_yaml(x, base, project, docker, dependencies, overwrite = FALSE) -\S4method{store_manifest_yaml}{ANY,s3_bucket}(x, base, project, docker, dependencies) +\S4method{store_manifest_yaml}{ANY,s3_bucket}(x, base, project, docker, dependencies, overwrite = FALSE) -\S4method{store_manifest_yaml}{ANY,character}(x, base, project, docker, dependencies) +\S4method{store_manifest_yaml}{ANY,character}(x, base, project, docker, dependencies, overwrite = FALSE) } \arguments{ \item{x}{the \code{n2kManifest}} @@ -23,6 +23,9 @@ store_manifest_yaml(x, base, project, docker, dependencies) \item{docker}{the docker image to use} \item{dependencies}{extra GitHub packages to install} + +\item{overwrite}{Should an existing object be overwritten? +Defaults to \code{TRUE}.} } \description{ Store a Docker configuration From b634f1d4d18e87c78fdb06e406344c09a1ea9622 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Thu, 6 Feb 2025 14:58:59 +0100 Subject: [PATCH 07/16] =?UTF-8?q?=F0=9F=90=9B=20N2k=5Fmanifest()=20get=20a?= =?UTF-8?q?=20stable=20fingerprint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A different order of the attributes leads to a different fingerprint --- R/n2k_manifest.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/R/n2k_manifest.R b/R/n2k_manifest.R index 786cc3fb..30fd3493 100644 --- a/R/n2k_manifest.R +++ b/R/n2k_manifest.R @@ -35,12 +35,11 @@ setMethod( noNA(manifest$fingerprint) ) - if (inherits(manifest, "tbl")) { - manifest <- as.data.frame(manifest) - } manifest |> + as.data.frame() |> distinct(.data$fingerprint, .data$parent) |> - arrange(.data$fingerprint, .data$parent) -> manifest - new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)) + arrange(.data$fingerprint, .data$parent) -> manif + attributes(manif) <- attributes(manif)[c("names", "row.names", "class")] + new("n2kManifest", Manifest = manif, Fingerprint = sha1(manif)) } ) From d31902f677bdbc36edc3fc782f4d81f53851f6a3 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Fri, 7 Mar 2025 13:15:55 +0100 Subject: [PATCH 08/16] =?UTF-8?q?=E2=9C=A8=20Get=5Fresult()=20gains=20a=20?= =?UTF-8?q?methode=20for=20n2kManifest?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/get_result_n2kmanifest.R | 57 +++++++++++++++++++++++++++++++++++++- R/get_result_s3_object.R | 2 +- man/get_result.Rd | 7 +++-- 3 files changed, 62 insertions(+), 4 deletions(-) diff --git a/R/get_result_n2kmanifest.R b/R/get_result_n2kmanifest.R index db10ddbc..183089c3 100644 --- a/R/get_result_n2kmanifest.R +++ b/R/get_result_n2kmanifest.R @@ -6,7 +6,7 @@ #' @include import_s3_classes.R setMethod( f = "get_result", - signature = signature(x = "n2kManifest"), + signature = signature(x = "n2kManifest", base = "character"), definition = function(x, base, ..., verbose = TRUE) { assert_that(validObject(x)) display(verbose = verbose, paste("Handle manifest", x@Fingerprint)) @@ -21,3 +21,58 @@ setMethod( do.call(what = combine) } ) + +#' @rdname get_result +#' @inheritParams read_model +#' @importFrom assertthat assert_that is.string noNA +#' @importFrom methods setMethod new +#' @importFrom purrr map_chr +#' @include import_s3_classes.R +setMethod( + f = "get_result", + signature = signature(x = "n2kManifest", base = "s3_bucket"), + definition = function(x, base, project, ..., verbose = TRUE) { + assert_that(validObject(x)) + display(verbose = verbose, paste("Handle manifest", x@Fingerprint)) + get_bucket( + bucket = base, prefix = file.path(project, "results"), max = Inf + ) |> + map_chr("Key") |> + basename() -> done + to_do <- order_manifest(manifest = x) + vapply( + to_do[!paste0(to_do, ".rds") %in% done], FUN.VALUE = logical(1), + FUN = function(x, base, verbose, project, ...) { + display(verbose = verbose, paste(" extracting", x)) + substring(x, 1, 4) |> + sprintf(fmt = "%2$s/%1$s", project) |> + get_bucket(bucket = base, max = Inf) -> available + available <- available[ + map_chr(available, "Key") |> + grepl(pattern = x) + ] + stopifnot( + "object not found or multiple objects found" = length(available) == 1 + ) + get_result( + available[[1]], base = base, verbose = verbose, project = project, ... + ) + gc(verbose = FALSE) + return(TRUE) + }, + base = base, verbose = verbose, project = project, ... + ) + order_manifest(manifest = x) |> + vapply( + FUN = function(hash, base, project, verbose, ...) { + get_result( + x = hash, base = base, project = project, ..., verbose = verbose + ) |> + list() + }, + FUN.VALUE = vector(mode = "list", length = 1), base = base, + verbose = verbose, project = project, ... + ) |> + do.call(what = combine) + } +) diff --git a/R/get_result_s3_object.R b/R/get_result_s3_object.R index d9d1b9aa..45cb59d8 100644 --- a/R/get_result_s3_object.R +++ b/R/get_result_s3_object.R @@ -14,7 +14,7 @@ setMethod( i <- 1 repeat { bucket_ok <- tryCatch( - s3saveRDS(x, bucket = base, object = target, multipart = TRUE), + s3saveRDS(result, bucket = base, object = target, multipart = TRUE), error = function(err) { err } diff --git a/man/get_result.Rd b/man/get_result.Rd index 83891dbb..d5134a94 100644 --- a/man/get_result.Rd +++ b/man/get_result.Rd @@ -9,7 +9,8 @@ \alias{get_result,character,s3_bucket-method} \alias{get_result,n2kInla,ANY-method} \alias{get_result,n2kModel,ANY-method} -\alias{get_result,n2kManifest,ANY-method} +\alias{get_result,n2kManifest,character-method} +\alias{get_result,n2kManifest,s3_bucket-method} \alias{get_result,s3_object,ANY-method} \title{Add the results from an analysis} \usage{ @@ -23,7 +24,9 @@ get_result(x, base, ...) \S4method{get_result}{n2kModel,ANY}(x, verbose = TRUE, ...) -\S4method{get_result}{n2kManifest,ANY}(x, base, ..., verbose = TRUE) +\S4method{get_result}{n2kManifest,character}(x, base, ..., verbose = TRUE) + +\S4method{get_result}{n2kManifest,s3_bucket}(x, base, project, ..., verbose = TRUE) \S4method{get_result}{s3_object,ANY}(x, base, ..., project, verbose = TRUE) } From 828dfe3a32e1076639cf32ebd1b48a3f0742e94a Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Fri, 7 Mar 2025 13:17:07 +0100 Subject: [PATCH 09/16] =?UTF-8?q?=F0=9F=90=9B=20Indirect=20fit=20on=20inla?= =?UTF-8?q?=20ignore=20compute=20settings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/fit_model_n2k_inla.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/fit_model_n2k_inla.R b/R/fit_model_n2k_inla.R index 2fe03d1b..556ac0bd 100644 --- a/R/fit_model_n2k_inla.R +++ b/R/fit_model_n2k_inla.R @@ -114,7 +114,9 @@ direct_fit <- function(control, data, lc, timeout = NULL) { #' @importFrom assertthat assert_that is.number indirect_fit <- function(control, data, lc, response, timeout = NULL) { # first fit model without missing data + compute <- control$control.compute control$data <- data[!is.na(data[[response]]), ] + control$control.compute <- NULL m0 <- try({ if (!is.null(timeout)) { assert_that(is.number(timeout), timeout > 0) @@ -142,6 +144,7 @@ indirect_fit <- function(control, data, lc, response, timeout = NULL) { control$data <- data control$lincomb <- lc control$control.update <- list(result = m0) + control$control.compute <- compute try({ if (!is.null(timeout)) { assert_that(is.number(timeout), timeout > 0) From 3ede963b0fca79b16de6ced878fcd8cb51f64dd1 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 1 Apr 2025 16:38:45 +0200 Subject: [PATCH 10/16] =?UTF-8?q?=F0=9F=9A=A8=20Fix=20indentation=20linter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/fit_model_character.R | 2 +- R/fit_model_n2k_hurdle_imputed.R | 2 +- R/get_model_parameter_n2k_model_imputed.R | 9 +- R/get_result_n2k_inla.R | 24 +- R/n2k_aggregated.R | 13 +- R/n2k_analysis_metadata_class.R | 69 +- R/n2k_analysis_version_class.R | 32 +- R/n2k_anomaly_class.R | 12 +- R/n2k_contrast_class.R | 2 +- R/n2k_inla.R | 2 +- R/n2k_model_class.R | 16 +- R/n2k_model_imputed.R | 12 +- R/n2k_parameter_class.R | 61 +- R/n2k_result_class.R | 86 ++- R/n2k_spde.R | 2 +- R/select_factor_treshold.R | 15 +- R/store_manifest.R | 4 +- tests/testthat/test_aaa_n2k_manifest.R | 80 ++- .../test_aaa_select_factor_treshold.R | 4 +- tests/testthat/test_aba_n2k_inla.R | 240 ++++--- tests/testthat/test_aca_n2k_anomaly.R | 11 +- tests/testthat/test_caa_fit_model.R | 3 +- tests/testthat/test_daa_get_model_parameter.R | 667 +++++++++--------- 23 files changed, 717 insertions(+), 651 deletions(-) diff --git a/R/fit_model_character.R b/R/fit_model_character.R index 578de151..b9dba0ff 100644 --- a/R/fit_model_character.R +++ b/R/fit_model_character.R @@ -51,7 +51,7 @@ setMethod( dots <- list(...) if ( !has_name(dots, "local") || is.null(dots$local) || - !inherits(base, "s3_bucket") + !inherits(base, "s3_bucket") ) { analysis <- read_model(hash, base = base, project = project) display(verbose, paste(status(analysis), "-> "), FALSE) diff --git a/R/fit_model_n2k_hurdle_imputed.R b/R/fit_model_n2k_hurdle_imputed.R index edaf2571..be37d213 100644 --- a/R/fit_model_n2k_hurdle_imputed.R +++ b/R/fit_model_n2k_hurdle_imputed.R @@ -9,7 +9,7 @@ setMethod( signature = signature(x = "n2kHurdleImputed"), definition = function( x, base, project, status = c("new", "waiting"), ... -) { + ) { validObject(x) assert_that(is.character(status), length(status) >= 1) diff --git a/R/get_model_parameter_n2k_model_imputed.R b/R/get_model_parameter_n2k_model_imputed.R index 09808642..4c8df43d 100644 --- a/R/get_model_parameter_n2k_model_imputed.R +++ b/R/get_model_parameter_n2k_model_imputed.R @@ -21,8 +21,10 @@ setMethod( parent = parent$fingerprint, stringsAsFactors = FALSE ) %>% - mutate(fingerprint = map2_chr( - .data$parent, .data$description, ~sha1(c(.x, .y))) + mutate( + fingerprint = map2_chr( + .data$parent, .data$description, ~sha1(c(.x, .y)) + ) ) new( "n2kParameter", @@ -30,8 +32,7 @@ setMethod( ParameterEstimate = parameter %>% inner_join( analysis@Results %>% - mutate( - Parameter = as.character(.data$Parameter)), + mutate(Parameter = as.character(.data$Parameter)), by = c("description" = "Parameter") ) %>% transmute( diff --git a/R/get_result_n2k_inla.R b/R/get_result_n2k_inla.R index 8e1d7d28..5c51ad70 100644 --- a/R/get_result_n2k_inla.R +++ b/R/get_result_n2k_inla.R @@ -155,18 +155,18 @@ setMethod( random_id <- anomaly@Parameter %>% semi_join( anomaly@Parameter %>% - semi_join( - anomaly@Parameter %>% - semi_join( - data.frame( - description = "Random effect BLUP", - stringsAsFactors = FALSE - ), - by = "description" - ) %>% - mutate(description = y), - by = c("parent" = "fingerprint", "description") - ), + semi_join( + anomaly@Parameter %>% + semi_join( + data.frame( + description = "Random effect BLUP", + stringsAsFactors = FALSE + ), + by = "description" + ) %>% + mutate(description = y), + by = c("parent" = "fingerprint", "description") + ), by = c("parent" = "fingerprint") ) %>% select(-"parent", parameter = "fingerprint") diff --git a/R/n2k_aggregated.R b/R/n2k_aggregated.R index 38cf014d..71480e09 100644 --- a/R/n2k_aggregated.R +++ b/R/n2k_aggregated.R @@ -108,11 +108,14 @@ setMethod( } dots$parent_statusfingerprint <- sha1(dots$parent_status) } else { - if (is.null(dots[["parent_status"]])) { - stop( -"'parent_status' is required when 'parent_statusfingerprint' is provided" - ) - } + list(!is.null(dots[["parent_status"]])) |> + setNames( + paste( + "'parent_status' is required when 'parent_statusfingerprint' is", + "provided" + ) + ) |> + do.call(what = stopifnot) } analysis_relation <- data.frame( analysis = file_fingerprint, diff --git a/R/n2k_analysis_metadata_class.R b/R/n2k_analysis_metadata_class.R index c198c2a8..15ec997f 100644 --- a/R/n2k_analysis_metadata_class.R +++ b/R/n2k_analysis_metadata_class.R @@ -72,12 +72,14 @@ setValidity( assert_that( length(object@AnalysisFormula) == nrow(object@AnalysisMetadata), - msg = "Number of 'AnalysisFormula' not equal to number of 'AnalysisMetadata'" + msg = + "Number of 'AnalysisFormula' not equal to number of 'AnalysisMetadata'" ) if (inherits(object@AnalysisMetadata$formula, "character")) { - assert_that(isTRUE(all.equal( - lapply(object@AnalysisMetadata$formula, as.formula), - object@AnalysisFormula + assert_that( + isTRUE(all.equal( + lapply(object@AnalysisMetadata$formula, as.formula), + object@AnalysisFormula )), msg = "Formulas in 'AnalysisMetadata' don't match 'AnalysisFormula'" ) @@ -86,10 +88,11 @@ setValidity( levels(object@AnalysisMetadata$formula), as.formula ) - assert_that(isTRUE(all.equal( - formula_list[object@AnalysisMetadata$formula], - object@AnalysisFormula - )), + assert_that( + isTRUE(all.equal( + formula_list[object@AnalysisMetadata$formula], + object@AnalysisFormula + )), msg = "Formulas in 'AnalysisMetadata' don't match 'AnalysisFormula'" ) } @@ -120,29 +123,31 @@ setValidity( )) { stop("last_analysed_year cannot exceed last_imported_year") } - if (any( - object@AnalysisMetadata$duration > + list( + object@AnalysisMetadata$duration <= object@AnalysisMetadata$last_imported_year - - object@AnalysisMetadata$first_imported_year + 1 - )) { - stop( + object@AnalysisMetadata$first_imported_year + 1 + ) |> + setNames( paste( "duration longer than the interval from first_imported_year to", "last_imported_year" ) - ) - } + ) |> + do.call(what = stopifnot) - if (any( - object@AnalysisMetadata$last_analysed_year < + list( + object@AnalysisMetadata$last_analysed_year >= object@AnalysisMetadata$first_imported_year + - object@AnalysisMetadata$duration - 1 - )) { - stop( -"last_analysed_year smaller than first_imported_year + duration - 1. Window -outside imported range." - ) - } + object@AnalysisMetadata$duration - 1 + ) |> + setNames( + paste( + "`last_analysed_year` smaller than `first_imported_year + duration -", + "1`. Window outside imported range." + ) + ) |> + do.call(what = stopifnot) ok_status <- c( "new", "working", "waiting", "error", "converged", "false_convergence", @@ -164,15 +169,17 @@ outside imported range." stop("analysis_date must be in the past") } - if (!all( + list( object@AnalysisRelation$analysis %in% object@AnalysisMetadata$file_fingerprint - )) { - stop( -"Some Analysis in 'AnalysisRelation' slot have no matching file_fingerprint in -'Analysis' slot" - ) - } + ) |> + setNames( + paste( + "Some Analysis in 'AnalysisRelation' slot have no matching", + "`file_fingerprint` in 'Analysis' slot" + ) + ) |> + do.call(what = stopifnot) return(TRUE) } diff --git a/R/n2k_analysis_version_class.R b/R/n2k_analysis_version_class.R index b76623fe..01231abb 100644 --- a/R/n2k_analysis_version_class.R +++ b/R/n2k_analysis_version_class.R @@ -46,23 +46,27 @@ setValidity( name = "AnalysisVersionRPackage" ) - if (!all( + list(all( object@AnalysisVersionRPackage$analysis_version %in% object@AnalysisVersion$fingerprint - )) { - stop( -"Some AnalysisVersion in 'AnalysisVersionRPackage' slot are not present in -'AnalysisVersion' slot" - ) - } - if (!all( + )) |> + setNames( + paste( + "Some AnalysisVersion in 'AnalysisVersionRPackage' slot are not", + "present in 'AnalysisVersion' slot" + ) + ) |> + do.call(what = stopifnot) + list(all( object@AnalysisVersionRPackage$r_package %in% object@RPackage$fingerprint - )) { - stop( -"Some r_package in 'AnalysisVersionRPackage' slot are not present in -'RPackage' slot" - ) - } + )) |> + setNames( + paste( + "Some r_package in 'AnalysisVersionRPackage' slot are not present in", + "'RPackage' slot" + ) + ) |> + do.call(what = stopifnot) if (anyDuplicated(object@AnalysisVersionRPackage)) { stop("Duplicated rows in 'AnalysisVersionRPackage' slot") } diff --git a/R/n2k_anomaly_class.R b/R/n2k_anomaly_class.R index 02cbfd24..fef770c6 100644 --- a/R/n2k_anomaly_class.R +++ b/R/n2k_anomaly_class.R @@ -69,10 +69,14 @@ setValidity( antijoin_anomaly <- object@Anomaly %>% anti_join(object@ParameterEstimate, by = c("analysis", "parameter")) %>% nrow() - assert_that( - antijoin_anomaly == 0, msg = -"Mismatch on Analysis and Parameter between Anomaly and ParameterEstimate slot" - ) + list(antijoin_anomaly == 0) |> + setNames( + paste( + "Mismatch on `Analysis` and `Parameter` between `Anomaly` and", + "`ParameterEstimate` slot" + ) + ) |> + do.call(what = stopifnot) anomalytype_duplicate <- object@AnomalyType %>% select("fingerprint") %>% diff --git a/R/n2k_contrast_class.R b/R/n2k_contrast_class.R index 02092bb9..60d36fe8 100644 --- a/R/n2k_contrast_class.R +++ b/R/n2k_contrast_class.R @@ -60,7 +60,7 @@ setValidity( } if (!all( na.omit(object@ContrastEstimate$contrast) %in% - object@Contrast$fingerprint + object@Contrast$fingerprint )) { stop("Some contrast in 'ConstrastEstimate' slot not found") } diff --git a/R/n2k_inla.R b/R/n2k_inla.R index 85332caa..9e3c28d9 100644 --- a/R/n2k_inla.R +++ b/R/n2k_inla.R @@ -88,7 +88,7 @@ setMethod( assert_that(is.time(analysis_date)) assert_that( is.null(lin_comb) || inherits(lin_comb, "list") || - (inherits(lin_comb, "matrix") && length(dim(lin_comb) == 2)), + (inherits(lin_comb, "matrix") && length(dim(lin_comb) == 2)), msg = "lin_comb must be either a list or a matrix" ) assert_that(is.list(replicate_name)) diff --git a/R/n2k_model_class.R b/R/n2k_model_class.R index 5873f095..de13c83a 100644 --- a/R/n2k_model_class.R +++ b/R/n2k_model_class.R @@ -23,16 +23,14 @@ setValidity( if (nrow(object@AnalysisMetadata) != 1) { stop("The 'AnalysisMetadata' slot must contain exactly one row") } - if (nrow(object@AnalysisRelation) > 0) { - if (any( - object@AnalysisRelation$analysis != - object@AnalysisMetadata$file_fingerprint - )) { - stop( - "Some Analysis in 'AnalysisRelation' slot don't match file_fingerprint" + stopifnot( + "Some Analysis in 'AnalysisRelation' slot don't match file_fingerprint" = + nrow(object@AnalysisRelation) == 0 || + all( + object@AnalysisRelation$analysis == + object@AnalysisMetadata$file_fingerprint ) - } - } + ) return(TRUE) } ) diff --git a/R/n2k_model_imputed.R b/R/n2k_model_imputed.R index dfbc3ac0..816c822c 100644 --- a/R/n2k_model_imputed.R +++ b/R/n2k_model_imputed.R @@ -106,10 +106,14 @@ setMethod( dots$parent_status <- coalesce(dots$parent_status, "waiting") dots$parent_statusfingerprint <- sha1(dots$parent_status) } - stopifnot( -"'parent_status' is required when 'parent_statusfingerprint' is provided" = -!is.null(dots[["parent_status"]]) - ) + list(!is.null(dots[["parent_status"]])) |> + setNames( + paste( + "'parent_status' is required when 'parent_statusfingerprint' is", + "provided" + ) + ) |> + do.call(what = stopifnot) analysis_relation <- data.frame( analysis = file_fingerprint, parent_analysis = dots$parent, parentstatus_fingerprint = dots$parent_statusfingerprint, diff --git a/R/n2k_parameter_class.R b/R/n2k_parameter_class.R index d74aff17..e11b0a12 100644 --- a/R/n2k_parameter_class.R +++ b/R/n2k_parameter_class.R @@ -51,14 +51,17 @@ setValidity( )) { stop("Some parent in 'Parameter' slot not found") } - if (!all( + all( object@ParameterEstimate$parameter %in% object@Parameter$fingerprint - )) { - stop( -"Some parameter in 'ParameterEstimate' slot have no matching fingerprint in -'Parameter' slot" - ) - } + ) |> + list() |> + setNames( + paste( + "Some parameter in 'ParameterEstimate' slot have no matching", + "fingerprint in 'Parameter' slot" + ) + ) |> + do.call(what = stopifnot) if (anyDuplicated(object@Parameter$fingerprint)) { stop("Duplicated fingerprint in 'Parameter' slot") } @@ -69,33 +72,27 @@ setValidity( stop("Duplicated rows in 'ParameterEstimate' slot") } - if (nrow(object@ParameterEstimate) > 0) { - test <- object@ParameterEstimate %>% - summarise( - test_lcl = any( - .data$estimate - .data$lower_confidence_limit < - -.Machine$double.neg.eps, - na.rm = TRUE - ), - test_ucl = any( - .data$estimate - .data$upper_confidence_limit > - .Machine$double.neg.eps, - na.rm = TRUE - ) - ) - if (test$test_lcl) { - stop( - "All estimate in 'ParameterEstimate' slot must be greather than the - lower_confidence_limit" + if (nrow(object@ParameterEstimate) == 0) { + return(TRUE) + } + test <- object@ParameterEstimate %>% + summarise( + test_lcl = all( + .data$estimate - .data$lower_confidence_limit >= 0, na.rm = TRUE + ), + test_ucl = all( + .data$upper_confidence_limit - .data$estimate >= 0, na.rm = TRUE ) - } - if (test$test_ucl) { - stop( - "All estimate in 'ParameterEstimate' slot must be less than the - upper_confidence_limit" + ) + list(test$test_lcl, test$test_ucl) |> + setNames( + paste( + "All estimates in 'ParameterEstimate' slot must be", + c("greather", "less"), "than the", + c("`lower_confidence_limit`", "`upper_confidence_limit`") ) - } - } + ) |> + do.call(what = stopifnot) return(TRUE) } ) diff --git a/R/n2k_result_class.R b/R/n2k_result_class.R index b5ea0935..c86982e7 100644 --- a/R/n2k_result_class.R +++ b/R/n2k_result_class.R @@ -17,45 +17,63 @@ setClass( setValidity( "n2kResult", function(object) { - if (!all( + all( object@AnalysisMetadata$analysis_version %in% object@AnalysisVersion$fingerprint - )) { - stop( -"Some analysis_version in 'AnalysisMetadata' slot are not present in -'AnalysisVersion' slot" - ) - } - if (!all( -object@ParameterEstimate$analysis %in% object@AnalysisMetadata$file_fingerprint - )) { - stop( -"Some Analysis in 'ParameterEstimate' slot are not present in 'AnalysisMetadata' -slot" - ) - } - if (!all( + ) |> + list() |> + setNames( + paste( + "Some analysis_version in 'AnalysisMetadata' slot are not present in", + "'AnalysisVersion' slot" + ) + ) |> + do.call(what = stopifnot) + all( + object@ParameterEstimate$analysis %in% + object@AnalysisMetadata$file_fingerprint + ) |> + list() |> + setNames( + paste( + "Some Analysis in 'ParameterEstimate' slot are not present in", + "'AnalysisMetadata' slot" + ) + ) |> + do.call(what = stopifnot) + all( object@Anomaly$analysis %in% object@AnalysisMetadata$file_fingerprint - )) { - stop( -"Some Analysis in 'Anomaly' slot are not present in 'AnalysisMetadata' slot" - ) - } - if (!all( + ) |> + list() |> + setNames( + paste( + "Some Analysis in 'Anomaly' slot are not present in", + "'AnalysisMetadata' slot" + ) + ) |> + do.call(what = stopifnot) + all( object@ContrastCoefficient$parameter %in% object@Parameter$fingerprint - )) { - stop( -"Some Parameter in 'ContrastCoefficient' slot are not present in 'Parameter' -slot" - ) - } - if (!all( + ) |> + list() |> + setNames( + paste( + "Some Parameter in 'ContrastCoefficient' slot are not present in", + "'Parameter' slot" + ) + ) |> + do.call(what = stopifnot) + all( object@Contrast$analysis %in% object@AnalysisMetadata$file_fingerprint - )) { - stop( -"Some Analysis in 'Contrast' slot are not present in 'AnalysisMetadata' slot" - ) - } + ) |> + list() |> + setNames( + paste( + "Some Analysis in 'Contrast' slot are not present in", + "'AnalysisMetadata' slot" + ) + ) |> + do.call(what = stopifnot) return(TRUE) } ) diff --git a/R/n2k_spde.R b/R/n2k_spde.R index cc7b9609..ca0cb8d7 100644 --- a/R/n2k_spde.R +++ b/R/n2k_spde.R @@ -75,7 +75,7 @@ setMethod( assert_that(is.time(analysis_date)) assert_that( is.null(lin_comb) || inherits(lin_comb, "list") || - (inherits(lin_comb, "matrix") && length(dim(lin_comb) == 2)), + (inherits(lin_comb, "matrix") && length(dim(lin_comb) == 2)), msg = "lin_comb must be either a list or a matrix" ) assert_that(is.list(replicate_name)) diff --git a/R/select_factor_treshold.R b/R/select_factor_treshold.R index 769faa3a..7df5bfbf 100644 --- a/R/select_factor_treshold.R +++ b/R/select_factor_treshold.R @@ -29,12 +29,15 @@ select_factor_threshold <- function(observation, variable, threshold) { if (!is.factor(variable_factor)) { variable_factor <- factor(variable_factor) } - if (nrow(observation) < 2 * length(levels(variable_factor))) { - stop( -"The number of observations much be at least twice the number of levels in ", -variable - ) - } + + list(nrow(observation) >= 2 * length(levels(variable_factor))) |> + setNames( + paste( + "The number of observations much be at least twice the number of", + "levels in", variable + ) + ) |> + do.call(what = stopifnot) model <- glm.nb(observation$Count ~ 0 + variable_factor) log_threshold <- max(coef(model)) + log(threshold) diff --git a/R/store_manifest.R b/R/store_manifest.R index 4693f0f1..6dbcbdcb 100644 --- a/R/store_manifest.R +++ b/R/store_manifest.R @@ -37,8 +37,8 @@ setMethod( #test if file exists fingerprint <- get_file_fingerprint(x) filename <- list.files( - dir, pattern = sprintf("%s.manifest$", fingerprint), full.names = TRUE - ) + dir, pattern = sprintf("%s.manifest$", fingerprint), full.names = TRUE + ) if (!overwrite && length(filename) > 0) { return(normalizePath(filename, winslash = "/")) } diff --git a/tests/testthat/test_aaa_n2k_manifest.R b/tests/testthat/test_aaa_n2k_manifest.R index 8a2260c3..841cfff0 100644 --- a/tests/testthat/test_aaa_n2k_manifest.R +++ b/tests/testthat/test_aaa_n2k_manifest.R @@ -46,45 +46,47 @@ test_that("n2k_manifest checks the fingerprint", { }) test_that( - "n2k_manifest checks the correct link between parent and fingerprint", { - manifest <- data.frame( - fingerprint = "1", parent = "2", stringsAsFactors = FALSE - ) - expect_error( - new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)), - "All rows have parents" - ) - manifest <- data.frame( - fingerprint = c("1", "2"), parent = c(NA, "3"), stringsAsFactors = FALSE - ) - expect_error( - new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)), - "Some parent in 'Manifest' slot have no matching fingerprint" - ) - manifest <- data.frame( - fingerprint = c("1", "2"), parent = c(NA, "2"), stringsAsFactors = FALSE - ) - expect_error( - new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)), - "Self references between parent and fingerprint" - ) - manifest <- data.frame( - fingerprint = c("1", "2", "3"), parent = c(NA, "3", "2"), - stringsAsFactors = FALSE - ) - expect_error( - new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)), - "Too many parent - child levels" - ) - manifest <- data.frame( - fingerprint = as.character(seq(1, 20)), parent = as.character(c(NA, 1:19)), - stringsAsFactors = FALSE - ) - expect_error( - new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)), - "Too many parent - child levels" - ) -}) + "n2k_manifest checks the correct link between parent and fingerprint", + { + manifest <- data.frame( + fingerprint = "1", parent = "2", stringsAsFactors = FALSE + ) + expect_error( + new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)), + "All rows have parents" + ) + manifest <- data.frame( + fingerprint = c("1", "2"), parent = c(NA, "3"), stringsAsFactors = FALSE + ) + expect_error( + new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)), + "Some parent in 'Manifest' slot have no matching fingerprint" + ) + manifest <- data.frame( + fingerprint = c("1", "2"), parent = c(NA, "2"), stringsAsFactors = FALSE + ) + expect_error( + new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)), + "Self references between parent and fingerprint" + ) + manifest <- data.frame( + fingerprint = c("1", "2", "3"), parent = c(NA, "3", "2"), + stringsAsFactors = FALSE + ) + expect_error( + new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)), + "Too many parent - child levels" + ) + manifest <- data.frame( + fingerprint = as.character(seq(1, 20)), + parent = as.character(c(NA, 1:19)), stringsAsFactors = FALSE + ) + expect_error( + new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)), + "Too many parent - child levels" + ) + } +) test_that("n2kManifest generates the object", { manifest <- data.frame( diff --git a/tests/testthat/test_aaa_select_factor_treshold.R b/tests/testthat/test_aaa_select_factor_treshold.R index fa675355..81c9ef97 100644 --- a/tests/testthat/test_aaa_select_factor_treshold.R +++ b/tests/testthat/test_aaa_select_factor_treshold.R @@ -30,8 +30,8 @@ describe("select_factor_threshold", { ), throws_error( paste( -"The number of observations much be at least twice the number of levels in", -variable + "The number of observations much be at least twice the number of", + "levels in", variable ) ) ) diff --git a/tests/testthat/test_aba_n2k_inla.R b/tests/testthat/test_aba_n2k_inla.R index e0cbac99..f3b43efb 100644 --- a/tests/testthat/test_aba_n2k_inla.R +++ b/tests/testthat/test_aba_n2k_inla.R @@ -304,23 +304,25 @@ test_that("n2k_inla() checks that last_imported_year is from the past", { }) test_that( "n2k_inla() checks that last_imported_year is not earlier than - first_imported_year ", { - expect_that( - n2k_inla( - data = dataset, - result_datasource_id = this_result_datasource_id, - species_group_id = this_species_group_id, - location_group_id = this_location_group_id, - model_type = this_model_type, - formula = this_formula, - first_imported_year = 2000, - last_imported_year = 1999, - analysis_date = this_analysis_date, - scheme_id = this_scheme_id - ), - throws_error("first_imported_year cannot exceed last_imported_year") - ) -}) + first_imported_year", + { + expect_that( + n2k_inla( + data = dataset, + result_datasource_id = this_result_datasource_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = 2000, + last_imported_year = 1999, + analysis_date = this_analysis_date, + scheme_id = this_scheme_id + ), + throws_error("first_imported_year cannot exceed last_imported_year") + ) + } +) test_that("n2k_inla() sets the correct duration", { expect_that( @@ -391,45 +393,47 @@ test_that("n2k_inla() converts numeric duration, when possible", { ) }) test_that( -"n2k_inla() checks that duration is not outside the FirstImportYear - -last_imported_year ranges", { - expect_that( - n2k_inla( - data = dataset, - result_datasource_id = this_result_datasource_id, - species_group_id = this_species_group_id, - location_group_id = this_location_group_id, - model_type = this_model_type, - formula = this_formula, - first_imported_year = 1999, - last_imported_year = 1999, - duration = 2, - analysis_date = this_analysis_date, - scheme_id = this_scheme_id - ), - throws_error( - "duration longer than the interval from first_imported_year to" + "n2k_inla() checks that duration is not outside the FirstImportYear - + last_imported_year ranges", + { + expect_that( + n2k_inla( + data = dataset, + result_datasource_id = this_result_datasource_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = 1999, + last_imported_year = 1999, + duration = 2, + analysis_date = this_analysis_date, + scheme_id = this_scheme_id + ), + throws_error( + "duration longer than the interval from first_imported_year to" + ) ) - ) - expect_that( - n2k_inla( - data = dataset, - result_datasource_id = this_result_datasource_id, - species_group_id = this_species_group_id, - location_group_id = this_location_group_id, - model_type = this_model_type, - formula = this_formula, - first_imported_year = 1999, - last_imported_year = 1999, - duration = 0, - analysis_date = this_analysis_date, - scheme_id = this_scheme_id - ), - throws_error( - "duration is not a count \\(a single positive integer\\)" + expect_that( + n2k_inla( + data = dataset, + result_datasource_id = this_result_datasource_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = 1999, + last_imported_year = 1999, + duration = 0, + analysis_date = this_analysis_date, + scheme_id = this_scheme_id + ), + throws_error( + "duration is not a count \\(a single positive integer\\)" + ) ) - ) -}) + } +) test_that("sets the correct last_analysed_year", { expect_that( @@ -536,7 +540,7 @@ test_that("n2k_inla() checks that last_analysed_year is within the range", { analysis_date = this_analysis_date, scheme_id = this_scheme_id ), - throws_error("last_analysed_year smaller than first_imported_year") + throws_error("`last_analysed_year` smaller than `first_imported_year") ) }) @@ -558,68 +562,70 @@ test_that("n2k_inla() checks if analysis date is from the past", { ) }) test_that( - "n2k_inla() checks if all variable in formula are available in the data", { - expect_that( - n2k_inla( - data = dataset[, c("A", "B", "C", "D", "E", "G")], - result_datasource_id = this_result_datasource_id, - species_group_id = this_species_group_id, - location_group_id = this_location_group_id, - model_type = this_model_type, - formula = this_formula, - first_imported_year = this_first_imported_year, - last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, - scheme_id = this_scheme_id - ), - throws_error("Missing variable `Count` in Data slot") - ) - expect_that( - n2k_inla( - data = dataset[, c("Count", "B", "C", "D", "E", "G")], - result_datasource_id = this_result_datasource_id, - species_group_id = this_species_group_id, - location_group_id = this_location_group_id, - model_type = this_model_type, - formula = this_formula, - first_imported_year = this_first_imported_year, - last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, - scheme_id = this_scheme_id - ), - throws_error("Missing variable `A` in Data slot") - ) - expect_that( - n2k_inla( - data = dataset[, c("Count", "A", "B", "C", "D", "E")], - result_datasource_id = this_result_datasource_id, - species_group_id = this_species_group_id, - location_group_id = this_location_group_id, - model_type = this_model_type, - formula = this_formula, - first_imported_year = this_first_imported_year, - last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, - scheme_id = this_scheme_id - ), - throws_error("Missing variable `G` in Data slot") - ) - expect_that( - n2k_inla( - data = dataset[, c("A", "B", "C", "Count", "E", "G")], - result_datasource_id = this_result_datasource_id, - species_group_id = this_species_group_id, - location_group_id = this_location_group_id, - model_type = this_model_type, - formula = this_formula, - first_imported_year = this_first_imported_year, - last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, - scheme_id = this_scheme_id - ), - throws_error("Missing variable `D` in Data slot") - ) -}) + "n2k_inla() checks if all variable in formula are available in the data", + { + expect_that( + n2k_inla( + data = dataset[, c("A", "B", "C", "D", "E", "G")], + result_datasource_id = this_result_datasource_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, + last_imported_year = this_last_imported_year, + analysis_date = this_analysis_date, + scheme_id = this_scheme_id + ), + throws_error("Missing variable `Count` in Data slot") + ) + expect_that( + n2k_inla( + data = dataset[, c("Count", "B", "C", "D", "E", "G")], + result_datasource_id = this_result_datasource_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, + last_imported_year = this_last_imported_year, + analysis_date = this_analysis_date, + scheme_id = this_scheme_id + ), + throws_error("Missing variable `A` in Data slot") + ) + expect_that( + n2k_inla( + data = dataset[, c("Count", "A", "B", "C", "D", "E")], + result_datasource_id = this_result_datasource_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, + last_imported_year = this_last_imported_year, + analysis_date = this_analysis_date, + scheme_id = this_scheme_id + ), + throws_error("Missing variable `G` in Data slot") + ) + expect_that( + n2k_inla( + data = dataset[, c("A", "B", "C", "Count", "E", "G")], + result_datasource_id = this_result_datasource_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, + last_imported_year = this_last_imported_year, + analysis_date = this_analysis_date, + scheme_id = this_scheme_id + ), + throws_error("Missing variable `D` in Data slot") + ) + } +) object_model <- n2k_inla( data = object, model_fit = model_object, status = "converged" diff --git a/tests/testthat/test_aca_n2k_anomaly.R b/tests/testthat/test_aca_n2k_anomaly.R index 2c775729..f2656e37 100644 --- a/tests/testthat/test_aca_n2k_anomaly.R +++ b/tests/testthat/test_aca_n2k_anomaly.R @@ -36,9 +36,9 @@ test_that("n2kAnomaly", { ) ) parameter <- expand.grid( - description = seq_len(10), parent = parameter$description, - stringsAsFactors = FALSE - ) %>% + description = seq_len(10), parent = parameter$description, + stringsAsFactors = FALSE + ) %>% mutate( description = ifelse( grepl("letters", .data$parent), LETTERS[.data$description], @@ -133,7 +133,10 @@ test_that("n2kAnomaly", { "n2kAnomaly", Parameter = parameter, AnomalyType = anomalytype, Anomaly = anomaly ), -"Mismatch on Analysis and Parameter between Anomaly and ParameterEstimate slot" + paste( + "Mismatch on `Analysis` and `Parameter` between `Anomaly` and", + "`ParameterEstimate` slot" + ) ) expect_error( new( diff --git a/tests/testthat/test_caa_fit_model.R b/tests/testthat/test_caa_fit_model.R index a5839e1d..e216454a 100644 --- a/tests/testthat/test_caa_fit_model.R +++ b/tests/testthat/test_caa_fit_model.R @@ -279,7 +279,8 @@ test_that("fit_model() works on n2kInlaComposite", { transmute( .data$value, estimate = .data$mean, - variance = .data$sd ^ 2) + variance = .data$sd ^ 2 + ) } ) diff --git a/tests/testthat/test_daa_get_model_parameter.R b/tests/testthat/test_daa_get_model_parameter.R index 80897522..3aff6242 100644 --- a/tests/testthat/test_daa_get_model_parameter.R +++ b/tests/testthat/test_daa_get_model_parameter.R @@ -1,351 +1,366 @@ test_that( "n2kInla with categorical and numeric fixed effect without random - effect", { - dataset <- test_data() - this_analysis_date <- as.POSIXct("2015-01-01 04:05:06.12", tz = "UTC") - this_result_datasource_id <- sha1(sample(letters)) - this_scheme_id <- sha1(sample(letters)) - this_species_group_id <- sha1(sample(letters)) - this_location_group_id <- sha1(sample(letters)) - this_seed <- 4L - this_model_type <- "inla nbinomial: A * B + E" - this_first_imported_year <- 1990L - this_last_imported_year <- 2015L - this_last_analysed_year <- 2015L - this_parent <- "abcdef" - this_duration <- this_last_imported_year - this_first_imported_year + 1 - analysis <- n2k_inla( - data = dataset, formula = "Count ~ A + C", - result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, family = "nbinomial", - model_type = this_model_type, - first_imported_year = this_first_imported_year, - last_imported_year = this_last_imported_year, - last_analysed_year = this_last_analysed_year, - analysis_date = this_analysis_date, seed = this_seed, parent = this_parent, - duration = this_duration - ) - expect_is(param <- get_model_parameter(analysis), "n2kParameter") - expect_identical(nrow(param@Parameter), 0L) - expect_identical(nrow(param@ParameterEstimate), 0L) + effect", + { + dataset <- test_data() + this_analysis_date <- as.POSIXct("2015-01-01 04:05:06.12", tz = "UTC") + this_result_datasource_id <- sha1(sample(letters)) + this_scheme_id <- sha1(sample(letters)) + this_species_group_id <- sha1(sample(letters)) + this_location_group_id <- sha1(sample(letters)) + this_seed <- 4L + this_model_type <- "inla nbinomial: A * B + E" + this_first_imported_year <- 1990L + this_last_imported_year <- 2015L + this_last_analysed_year <- 2015L + this_parent <- "abcdef" + this_duration <- this_last_imported_year - this_first_imported_year + 1 + analysis <- n2k_inla( + data = dataset, formula = "Count ~ A + C", + result_datasource_id = this_result_datasource_id, + scheme_id = this_scheme_id, species_group_id = this_species_group_id, + location_group_id = this_location_group_id, family = "nbinomial", + model_type = this_model_type, + first_imported_year = this_first_imported_year, + last_imported_year = this_last_imported_year, + last_analysed_year = this_last_analysed_year, + analysis_date = this_analysis_date, seed = this_seed, + parent = this_parent, duration = this_duration + ) + expect_is(param <- get_model_parameter(analysis), "n2kParameter") + expect_identical(nrow(param@Parameter), 0L) + expect_identical(nrow(param@ParameterEstimate), 0L) - analysis <- fit_model(analysis) - expect_message( - param <- get_model_parameter(analysis, verbose = TRUE), - "reading model parameters" - ) - expect_is(param, "n2kParameter") - expect_identical( - param@Parameter %>% - semi_join( - tibble(description = "Random effect BLUP"), by = "description" - ) %>% - inner_join(param@Parameter, by = c("fingerprint" = "parent")) %>% - nrow(), - 0L - ) - expect_identical( - param@Parameter %>% + analysis <- fit_model(analysis) + expect_message( + param <- get_model_parameter(analysis, verbose = TRUE), + "reading model parameters" + ) + expect_is(param, "n2kParameter") + expect_identical( + param@Parameter %>% + semi_join( + tibble(description = "Random effect BLUP"), by = "description" + ) %>% + inner_join(param@Parameter, by = c("fingerprint" = "parent")) %>% + nrow(), + 0L + ) + expect_identical( + param@Parameter %>% + semi_join( + tibble(description = "Random effect variance"), by = "description" + ) %>% + inner_join(param@Parameter, by = c("fingerprint" = "parent")) %>% + nrow(), + 0L + ) + fixed <- param@Parameter %>% semi_join( - tibble(description = "Random effect variance"), by = "description" + param@Parameter %>% + semi_join(tibble(description = "Fixed effect"), by = "description"), + by = c("parent" = "fingerprint") ) %>% - inner_join(param@Parameter, by = c("fingerprint" = "parent")) %>% - nrow(), - 0L - ) - fixed <- param@Parameter %>% - semi_join( - param@Parameter %>% - semi_join(tibble(description = "Fixed effect"), by = "description"), - by = c("parent" = "fingerprint") - ) %>% - select("fingerprint", main = "description") %>% - left_join(param@Parameter, by = c("fingerprint" = "parent")) - expect_identical(nrow(fixed), 4L) - expect_identical(fixed$main == "A", !is.na(fixed$description)) - expect_identical(fixed$main == "A", !is.na(fixed$fingerprint.y)) -}) + select("fingerprint", main = "description") %>% + left_join(param@Parameter, by = c("fingerprint" = "parent")) + expect_identical(nrow(fixed), 4L) + expect_identical(fixed$main == "A", !is.na(fixed$description)) + expect_identical(fixed$main == "A", !is.na(fixed$fingerprint.y)) + } +) test_that( "n2kInla with single random effect, categorical-categorical - interaction and categorical numeric interaction", { - dataset <- test_data() - this_analysis_date <- as.POSIXct("2015-01-01 04:05:06.12", tz = "UTC") - this_result_datasource_id <- sha1(sample(letters)) - this_scheme_id <- sha1(sample(letters)) - this_species_group_id <- sha1(sample(letters)) - this_location_group_id <- sha1(sample(letters)) - this_seed <- 4L - this_model_type <- "inla nbinomial: A * B + E" - this_first_imported_year <- 1990L - this_last_imported_year <- 2015L - this_last_analysed_year <- 2015L - this_parent <- "abcdef" - this_duration <- this_last_imported_year - this_first_imported_year + 1 - analysis <- n2k_inla( - formula = "Count ~ 0 + A * C + A * B + f(E, model = \"iid\")", - result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, family = "nbinomial", - model_type = this_model_type, - first_imported_year = this_first_imported_year, - last_imported_year = this_last_imported_year, - last_analysed_year = this_last_analysed_year, - analysis_date = this_analysis_date, seed = this_seed, data = dataset, - parent = this_parent, duration = this_duration - ) - analysis <- fit_model(analysis) - expect_message( - param <- get_model_parameter(analysis, verbose = TRUE), - "reading model parameters: fixed effects" - ) - expect_is(param, "n2kParameter") - expect_identical( - param@Parameter %>% - semi_join( - tibble(description = "Random effect variance"), by = "description" - ) %>% - inner_join(param@Parameter, by = c("fingerprint" = "parent")) %>% - nrow(), - 1L - ) - fixed <- param@Parameter %>% - semi_join( - param@Parameter %>% - semi_join( - tibble(description = "Fixed effect"), - by = "description" - ), - by = c("parent" = "fingerprint") - ) %>% - select("fingerprint", main = "description") %>% - left_join( - param@Parameter, - by = c("fingerprint" = "parent") + interaction and categorical numeric interaction", + { + dataset <- test_data() + this_analysis_date <- as.POSIXct("2015-01-01 04:05:06.12", tz = "UTC") + this_result_datasource_id <- sha1(sample(letters)) + this_scheme_id <- sha1(sample(letters)) + this_species_group_id <- sha1(sample(letters)) + this_location_group_id <- sha1(sample(letters)) + this_seed <- 4L + this_model_type <- "inla nbinomial: A * B + E" + this_first_imported_year <- 1990L + this_last_imported_year <- 2015L + this_last_analysed_year <- 2015L + this_parent <- "abcdef" + this_duration <- this_last_imported_year - this_first_imported_year + 1 + analysis <- n2k_inla( + formula = "Count ~ 0 + A * C + A * B + f(E, model = \"iid\")", + result_datasource_id = this_result_datasource_id, + scheme_id = this_scheme_id, species_group_id = this_species_group_id, + location_group_id = this_location_group_id, family = "nbinomial", + model_type = this_model_type, + first_imported_year = this_first_imported_year, + last_imported_year = this_last_imported_year, + last_analysed_year = this_last_analysed_year, + analysis_date = this_analysis_date, seed = this_seed, data = dataset, + parent = this_parent, duration = this_duration ) - expect_identical(nrow(fixed), 9L) - expect_identical(grepl(":", fixed$main), grepl(":", fixed$description)) - expect_identical(fixed$main == "C", is.na(fixed$fingerprint.y)) - expect_identical(fixed$main == "A:C", grepl(".+:$", fixed$description)) - expect_identical(fixed$main == "A:B", grepl(".+:.+$", fixed$description)) - random <- param@Parameter %>% - semi_join( + analysis <- fit_model(analysis) + expect_message( + param <- get_model_parameter(analysis, verbose = TRUE), + "reading model parameters: fixed effects" + ) + expect_is(param, "n2kParameter") + expect_identical( param@Parameter %>% semi_join( - tibble(description = "Random effect BLUP"), by = "description" - ), - by = c("parent" = "fingerprint") - ) %>% - select("fingerprint", Main = "description") %>% - left_join( - param@Parameter %>% - rename(finger = "fingerprint", level = "description"), - by = c("fingerprint" = "parent") - ) %>% - left_join( - param@Parameter %>% - select("parent", Level2 = "description"), - by = c("finger" = "parent") + tibble(description = "Random effect variance"), by = "description" + ) %>% + inner_join(param@Parameter, by = c("fingerprint" = "parent")) %>% + nrow(), + 1L ) - expect_false(any(is.na(random$level))) - expect_true(all(is.na(random$level2))) - expect_identical( - dataset$E %>% - unique() %>% - sort() %>% - as.character(), - random$level - ) - expect_identical( - param@ParameterEstimate %>% - inner_join( - random, - by = c("parameter" = "finger") + fixed <- param@Parameter %>% + semi_join( + param@Parameter %>% + semi_join( + tibble(description = "Fixed effect"), + by = "description" + ), + by = c("parent" = "fingerprint") ) %>% - nrow(), - nrow(random) - ) -}) - -test_that( - "n2kInla with numeric-numeric interaction and two random effects of - which on replicated", { - dataset <- test_data() - this_analysis_date <- as.POSIXct("2015-01-01 04:05:06.12", tz = "UTC") - this_result_datasource_id <- sha1(sample(letters)) - this_scheme_id <- sha1(sample(letters)) - this_species_group_id <- sha1(sample(letters)) - this_location_group_id <- sha1(sample(letters)) - this_seed <- 4L - this_model_type <- "inla nbinomial: A * B + E" - this_first_imported_year <- 1990L - this_last_imported_year <- 2015L - this_last_analysed_year <- 2015L - this_parent <- "abcdef" - this_duration <- this_last_imported_year - this_first_imported_year + 1 - analysis <- n2k_inla( - formula = "Count ~ C * D + - f(E, model = \"rw1\", replicate = as.integer(A))", - result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, family = "nbinomial", - model_type = this_model_type, - first_imported_year = this_first_imported_year, - last_imported_year = this_last_imported_year, - last_analysed_year = this_last_analysed_year, - analysis_date = this_analysis_date, seed = this_seed, data = dataset, - parent = this_parent, duration = this_duration - ) - analysis <- fit_model(analysis) - expect_message( - param <- get_model_parameter(analysis, verbose = TRUE), - "reading model parameters: fixed effects" - ) - expect_is(param, "n2kParameter") - expect_identical( - param@Parameter %>% + select("fingerprint", main = "description") %>% + left_join( + param@Parameter, + by = c("fingerprint" = "parent") + ) + expect_identical(nrow(fixed), 9L) + expect_identical(grepl(":", fixed$main), grepl(":", fixed$description)) + expect_identical(fixed$main == "C", is.na(fixed$fingerprint.y)) + expect_identical(fixed$main == "A:C", grepl(".+:$", fixed$description)) + expect_identical(fixed$main == "A:B", grepl(".+:.+$", fixed$description)) + random <- param@Parameter %>% semi_join( - tibble(description = "Random effect variance"), by = "description" + param@Parameter %>% + semi_join( + tibble(description = "Random effect BLUP"), by = "description" + ), + by = c("parent" = "fingerprint") ) %>% - inner_join(param@Parameter, by = c("fingerprint" = "parent")) %>% - nrow(), - 1L - ) - fixed <- param@Parameter %>% - semi_join( - param@Parameter %>% - semi_join(tibble(description = "Fixed effect"), by = "description"), - by = c("parent" = "fingerprint") - ) %>% - select("fingerprint", main = "description") %>% - left_join(param@Parameter, by = c("fingerprint" = "parent")) - expect_identical(nrow(fixed), 4L) - grepl(":", fixed$description) %>% - any() %>% - expect_false() - grep(":", fixed$main) %>% - length() %>% - expect_identical(1L) - expect_identical(is.na(fixed$description), is.na(fixed$fingerprint.y)) - fixed$description %>% - is.na() %>% - all() %>% - expect_true() - random <- param@Parameter %>% - semi_join( - param@Parameter %>% - semi_join(tibble(description = "Random effect BLUP"), by = "description"), - by = c("parent" = "fingerprint") - ) %>% - select("fingerprint", main = "description") %>% - left_join( - param@Parameter %>% - rename(finger = "fingerprint", level = "description"), - by = c("fingerprint" = "parent") - ) %>% - left_join( + select("fingerprint", Main = "description") %>% + left_join( + param@Parameter %>% + rename(finger = "fingerprint", level = "description"), + by = c("fingerprint" = "parent") + ) %>% + left_join( + param@Parameter %>% + select("parent", Level2 = "description"), + by = c("finger" = "parent") + ) + expect_false(any(is.na(random$level))) + expect_true(all(is.na(random$level2))) + expect_identical( + dataset$E %>% + unique() %>% + sort() %>% + as.character(), + random$level + ) + expect_identical( + param@ParameterEstimate %>% + inner_join( + random, + by = c("parameter" = "finger") + ) %>% + nrow(), + nrow(random) + ) + } +) + +test_that( + paste( + "n2kInla with numeric-numeric interaction and two random effects of which", + "one replicated" + ), + { + dataset <- test_data() + this_analysis_date <- as.POSIXct("2015-01-01 04:05:06.12", tz = "UTC") + this_result_datasource_id <- sha1(sample(letters)) + this_scheme_id <- sha1(sample(letters)) + this_species_group_id <- sha1(sample(letters)) + this_location_group_id <- sha1(sample(letters)) + this_seed <- 4L + this_model_type <- "inla nbinomial: A * B + E" + this_first_imported_year <- 1990L + this_last_imported_year <- 2015L + this_last_analysed_year <- 2015L + this_parent <- "abcdef" + this_duration <- this_last_imported_year - this_first_imported_year + 1 + analysis <- n2k_inla( + formula = + "Count ~ C * D + f(E, model = \"rw1\", replicate = as.integer(A))", + result_datasource_id = this_result_datasource_id, + scheme_id = this_scheme_id, species_group_id = this_species_group_id, + location_group_id = this_location_group_id, family = "nbinomial", + model_type = this_model_type, + first_imported_year = this_first_imported_year, + last_imported_year = this_last_imported_year, + last_analysed_year = this_last_analysed_year, + analysis_date = this_analysis_date, seed = this_seed, data = dataset, + parent = this_parent, duration = this_duration + ) + analysis <- fit_model(analysis) + expect_message( + param <- get_model_parameter(analysis, verbose = TRUE), + "reading model parameters: fixed effects" + ) + expect_is(param, "n2kParameter") + expect_identical( param@Parameter %>% - select("parent", level2 = "description", finger2 = "fingerprint"), - by = c("finger" = "parent") + semi_join( + tibble(description = "Random effect variance"), by = "description" + ) %>% + inner_join(param@Parameter, by = c("fingerprint" = "parent")) %>% + nrow(), + 1L ) - expect_false(any(is.na(random$level))) - expect_equal( - random %>% - group_by(.data$main, .data$level) %>% - summarise( - n = n(), missing = mean(is.na(.data$level2)), .groups = "drop_last" + fixed <- param@Parameter %>% + semi_join( + param@Parameter %>% + semi_join(tibble(description = "Fixed effect"), by = "description"), + by = c("parent" = "fingerprint") ) %>% - summarise( - n1 = n(), n2 = mean(.data$n), missing = mean(.data$missing), - .groups = "drop" - ), - tibble(main = "E", n1 = 3L, n2 = 10, missing = 0) - ) - expect_identical( - param@ParameterEstimate %>% - inner_join( - random %>% - mutate( - finger2 = ifelse(is.na(.data$finger2), .data$finger, .data$finger2) + select("fingerprint", main = "description") %>% + left_join(param@Parameter, by = c("fingerprint" = "parent")) + expect_identical(nrow(fixed), 4L) + grepl(":", fixed$description) %>% + any() %>% + expect_false() + grep(":", fixed$main) %>% + length() %>% + expect_identical(1L) + expect_identical(is.na(fixed$description), is.na(fixed$fingerprint.y)) + fixed$description %>% + is.na() %>% + all() %>% + expect_true() + random <- param@Parameter %>% + semi_join( + param@Parameter %>% + semi_join( + tibble(description = "Random effect BLUP"), by = "description" ), - by = c("parameter" = "finger2") + by = c("parent" = "fingerprint") + ) %>% + select("fingerprint", main = "description") %>% + left_join( + param@Parameter %>% + rename(finger = "fingerprint", level = "description"), + by = c("fingerprint" = "parent") ) %>% - nrow(), - nrow(random) - ) -}) + left_join( + param@Parameter %>% + select("parent", level2 = "description", finger2 = "fingerprint"), + by = c("finger" = "parent") + ) + expect_false(any(is.na(random$level))) + expect_equal( + random %>% + group_by(.data$main, .data$level) %>% + summarise( + n = n(), missing = mean(is.na(.data$level2)), .groups = "drop_last" + ) %>% + summarise( + n1 = n(), n2 = mean(.data$n), missing = mean(.data$missing), + .groups = "drop" + ), + tibble(main = "E", n1 = 3L, n2 = 10, missing = 0) + ) + expect_identical( + param@ParameterEstimate %>% + inner_join( + random %>% + mutate( + finger2 = ifelse( + is.na(.data$finger2), .data$finger, .data$finger2 + ) + ), + by = c("parameter" = "finger2") + ) %>% + nrow(), + nrow(random) + ) + } +) -test_that("imputation and aggregation", { - set.seed(20191213) - this_result_datasource_id <- sha1(letters) - this_scheme_id <- sha1(letters) - this_species_group_id <- sha1(letters) - this_location_group_id <- sha1(letters) - this_analysis_date <- Sys.time() - this_model_type <- "inla poisson: A * (B + C) + C:D" - this_first_imported_year <- 1990L - this_last_imported_year <- 2015L - this_last_analysed_year <- 2014L - this_duration <- 1L - dataset <- test_data(missing = 0.2) - base <- tempfile("imputation") - dir.create(base) - project <- "imputation" +test_that( + "imputation and aggregation", + { + set.seed(20191213) + this_result_datasource_id <- sha1(letters) + this_scheme_id <- sha1(letters) + this_species_group_id <- sha1(letters) + this_location_group_id <- sha1(letters) + this_analysis_date <- Sys.time() + this_model_type <- "inla poisson: A * (B + C) + C:D" + this_first_imported_year <- 1990L + this_last_imported_year <- 2015L + this_last_analysed_year <- 2014L + this_duration <- 1L + dataset <- test_data(missing = 0.2) + base <- tempfile("imputation") + dir.create(base) + project <- "imputation" + + imputation <- n2k_inla( + data = dataset, scheme_id = this_scheme_id, + result_datasource_id = this_result_datasource_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, model_type = this_model_type, + first_imported_year = this_first_imported_year, imputation_size = 3, + last_imported_year = this_last_imported_year, family = "poisson", + last_analyses_year = this_last_analysed_year, duration = this_duration, + formula = "Count ~ A + f(E, model = \"iid\")", analysis_date = Sys.time(), + ) + aggregation <- n2k_aggregate( + scheme_id = this_scheme_id, + result_datasource_id = this_result_datasource_id, formula = "~ A + B", + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, model_type = this_model_type, + first_imported_year = this_first_imported_year, + last_imported_year = this_last_imported_year, fun = sum, + last_analyses_year = this_last_analysed_year, duration = this_duration, + parent = get_file_fingerprint(imputation), analysis_date = Sys.time() + ) + expect_is(result <- get_model_parameter(imputation), "n2kParameter") + expect_equal(nrow(result@Parameter), 0L) + expect_is(result <- get_model_parameter(aggregation), "n2kParameter") + expect_equal(nrow(result@Parameter), 0L) - imputation <- n2k_inla( - data = dataset, scheme_id = this_scheme_id, - result_datasource_id = this_result_datasource_id, - species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - first_imported_year = this_first_imported_year, imputation_size = 3, - last_imported_year = this_last_imported_year, family = "poisson", - last_analyses_year = this_last_analysed_year, duration = this_duration, - formula = "Count ~ A + f(E, model = \"iid\")", analysis_date = Sys.time(), - ) - aggregation <- n2k_aggregate( - scheme_id = this_scheme_id, - result_datasource_id = this_result_datasource_id, formula = "~ A + B", - species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - first_imported_year = this_first_imported_year, analysis_date = Sys.time(), - last_imported_year = this_last_imported_year, fun = sum, - last_analyses_year = this_last_analysed_year, duration = this_duration, - parent = get_file_fingerprint(imputation) - ) - expect_is(result <- get_model_parameter(imputation), "n2kParameter") - expect_equal(nrow(result@Parameter), 0L) - expect_is(result <- get_model_parameter(aggregation), "n2kParameter") - expect_equal(nrow(result@Parameter), 0L) + suppressWarnings({ + imputation <- fit_model(imputation, parallel_configs = FALSE) + }) + store_model(imputation, base = base, project = project) + aggregation <- fit_model(aggregation, base = base, project = project) + expect_is(result <- get_model_parameter(imputation), "n2kParameter") + expect_equal(nrow(result@Parameter), 1956L) + expect_is(result <- get_model_parameter(aggregation), "n2kParameter") + expect_equal(nrow(result@Parameter), 14L) - suppressWarnings({ - imputation <- fit_model(imputation, parallel_configs = FALSE) - }) - store_model(imputation, base = base, project = project) - aggregation <- fit_model(aggregation, base = base, project = project) - expect_is(result <- get_model_parameter(imputation), "n2kParameter") - expect_equal(nrow(result@Parameter), 1956L) - expect_is(result <- get_model_parameter(aggregation), "n2kParameter") - expect_equal(nrow(result@Parameter), 14L) + store_model(aggregation, base = base, project = project) + extractor <- function(model) { + model$summary.fixed[, c("mean", "sd")] + } + mi <- n2k_model_imputed( + scheme_id = this_scheme_id, model_args = list(family = "poisson"), + result_datasource_id = this_result_datasource_id, model_fun = INLA::inla, + species_group_id = this_species_group_id, extractor = extractor, + location_group_id = this_location_group_id, model_type = this_model_type, + first_imported_year = this_first_imported_year, + analysis_date = Sys.time(), last_imported_year = this_last_imported_year, + formula = "~ A", last_analyses_year = this_last_analysed_year, + duration = this_duration, parent = get_file_fingerprint(aggregation) + ) + expect_is(result <- get_model_parameter(mi), "n2kParameter") + expect_equal(nrow(result@Parameter), 0L) - store_model(aggregation, base = base, project = project) - extractor <- function(model) { - model$summary.fixed[, c("mean", "sd")] + mi <- fit_model(mi, base = base, project = project) + expect_is(result <- get_model_parameter(mi), "n2kParameter") + expect_equal(nrow(result@Parameter), 4L) } - mi <- n2k_model_imputed( - scheme_id = this_scheme_id, model_args = list(family = "poisson"), - result_datasource_id = this_result_datasource_id, model_fun = INLA::inla, - species_group_id = this_species_group_id, extractor = extractor, - location_group_id = this_location_group_id, model_type = this_model_type, - first_imported_year = this_first_imported_year, analysis_date = Sys.time(), - last_imported_year = this_last_imported_year, formula = "~ A", - last_analyses_year = this_last_analysed_year, duration = this_duration, - parent = get_file_fingerprint(aggregation) - ) - expect_is(result <- get_model_parameter(mi), "n2kParameter") - expect_equal(nrow(result@Parameter), 0L) - - mi <- fit_model(mi, base = base, project = project) - expect_is(result <- get_model_parameter(mi), "n2kParameter") - expect_equal(nrow(result@Parameter), 4L) -}) +) From ca38402ae1db64d1cedf404605cf64993e1ad020 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 1 Apr 2025 16:43:26 +0200 Subject: [PATCH 11/16] =?UTF-8?q?=F0=9F=93=9D=20Update=20README?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 185e2efc..56f9bccd 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) -[![License](http://img.shields.io/badge/license-GPL--3-blue.svg?style=flat)](http://www.gnu.org/licenses/gpl-3.0.html) +[![License](http://img.shields.io/badge/license-GPL--3-blue.svg?style=flat) [![Release](https://img.shields.io/github/release/qubyte/rubidium.svg)](https://github.com/inbo/n2kanalysis/releases) -[![codecov](https://codecov.io/gh/inbo/n2kanalysis/branch/master/graph/badge.svg)](https://app.codecov.io/gh/inbo/n2kanalysis) +[![codecov](https://codecov.io/gh/inbo/n2kanalysis/branch/main/graph/badge.svg)](https://app.codecov.io/gh/inbo/n2kanalysis) # The `n2kanalysis` package From 3462507d0fdfae39e1ce271254e8b237437838ea Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 1 Apr 2025 16:52:37 +0200 Subject: [PATCH 12/16] =?UTF-8?q?=F0=9F=91=B7=20Use=20lateste=20checklist?= =?UTF-8?q?=20GHA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/check_on_branch.yml | 2 +- .github/workflows/check_on_main.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/check_on_branch.yml b/.github/workflows/check_on_branch.yml index 182dd948..70812d8e 100644 --- a/.github/workflows/check_on_branch.yml +++ b/.github/workflows/check_on_branch.yml @@ -20,4 +20,4 @@ jobs: permissions: contents: read steps: - - uses: inbo/actions/check_pkg@checklist-0.4.1 + - uses: inbo/actions/check_pkg@main diff --git a/.github/workflows/check_on_main.yml b/.github/workflows/check_on_main.yml index 3ffc91a0..3f8b37b3 100644 --- a/.github/workflows/check_on_main.yml +++ b/.github/workflows/check_on_main.yml @@ -20,4 +20,4 @@ jobs: AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} steps: - - uses: inbo/actions/check_pkg@checklist-0.4.1 + - uses: inbo/actions/check_pkg@main From d0809edec1fdd37345d7f3f7d6cfd6ecbfc895f2 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Thu, 18 Sep 2025 11:36:05 +0200 Subject: [PATCH 13/16] =?UTF-8?q?=F0=9F=8E=A8=20Apply=20air=20format=20.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- NAMESPACE | 2 +- R/combine.R | 13 +- R/combine_result.R | 4 +- R/fit_model_character.R | 55 +++- R/fit_model_n2k_aggregate.R | 7 +- R/fit_model_n2k_composite.R | 26 +- R/fit_model_n2k_hurdle_imputed.R | 88 +++++-- R/fit_model_n2k_inla.R | 102 +++++--- R/fit_model_n2k_manifest.R | 41 ++- R/fit_model_n2k_model_imputed.R | 19 +- R/fit_model_n2k_spde.R | 91 ++++--- R/fit_model_s3_object.R | 7 +- R/get_analysis_version.R | 12 +- R/get_anomaly_n2k_inla.R | 63 +++-- R/get_datafield_id.R | 30 ++- R/get_model_parameter_n2k_aggregated.R | 17 +- R/get_model_parameter_n2k_composite.R | 12 +- R/get_model_parameter_n2k_hurdle_imputed.R | 20 +- R/get_model_parameter_n2k_inla.R | 102 +++++--- R/get_model_parameter_n2k_model_imputed.R | 13 +- R/get_result_character.R | 17 +- R/get_result_n2k_inla.R | 38 ++- R/get_result_n2kmanifest.R | 37 ++- R/inla_inverse.R | 4 +- R/make_a.R | 6 +- R/manifest_yaml_to_bash.R | 127 ++++++--- R/moving_average.R | 8 +- R/moving_difference.R | 10 +- R/moving_trend.R | 7 +- R/n2k_aggregate_class.R | 17 +- R/n2k_aggregated.R | 47 +++- R/n2k_analysis_metadata_class.R | 46 ++-- R/n2k_analysis_version_class.R | 19 +- R/n2k_composite.R | 101 +++++-- R/n2k_composite_class.R | 23 +- R/n2k_contrast_class.R | 58 +++-- R/n2k_hurdle_imputed.R | 61 +++-- R/n2k_hurdle_imputed_class.R | 20 +- R/n2k_import.R | 26 +- R/n2k_import_class.R | 9 +- R/n2k_inla.R | 163 +++++++++--- R/n2k_inla_class.R | 62 +++-- R/n2k_inla_comparison.R | 52 +++- R/n2k_inla_comparison_class.R | 13 +- R/n2k_manifest.R | 6 +- R/n2k_manifest_class.R | 22 +- R/n2k_model_class.R | 1 + R/n2k_model_imputed.R | 80 ++++-- R/n2k_model_imputed_class.R | 33 ++- R/n2k_parameter_class.R | 33 ++- R/n2k_spde.R | 182 +++++++++---- R/parent_status.R | 7 +- R/read_manifest.R | 54 +++- R/read_result.R | 17 +- R/result_estimate_n2k_result.R | 17 +- R/select_factor_count_strictly_positive.R | 9 +- R/select_factor_treshold.R | 3 +- R/select_observed_range.R | 3 +- R/session_package.R | 13 +- R/sha1.R | 9 +- R/spde.R | 17 +- R/spde2matern.R | 9 +- R/spde2mesh.R | 15 +- R/spde_class.R | 18 +- R/status.R | 67 +++-- R/store_manifest.R | 31 ++- R/store_manifest_yaml.R | 53 +++- R/store_model.R | 37 ++- R/union.R | 3 +- R/write_s3_fun.R | 15 +- ...ive.Rd => select_factor_count_non_zero.Rd} | 12 +- tests/testthat/helper_test_data.R | 2 +- tests/testthat/test_aaa_get_datafield_id.R | 15 +- tests/testthat/test_aaa_n2k_manifest.R | 43 ++- ...aa_select_factor_count_strictly_positive.R | 18 +- .../test_aaa_select_factor_treshold.R | 3 +- tests/testthat/test_aaa_validobject.R | 9 +- tests/testthat/test_aba_n2k_import.R | 21 +- tests/testthat/test_aba_n2k_inla.R | 197 +++++++++----- tests/testthat/test_aba_n2k_inla_accessor.R | 16 +- tests/testthat/test_aba_n2k_inla_validation.R | 5 +- tests/testthat/test_abb_n2k_spde.R | 16 +- tests/testthat/test_aca_n2k_aggregate.R | 24 +- tests/testthat/test_aca_n2k_anomaly.R | 90 +++++-- tests/testthat/test_aca_n2k_model_accessor.R | 11 +- .../testthat/test_baa_get_file_fingerprint.R | 4 +- tests/testthat/test_baa_store_manifest.R | 8 +- tests/testthat/test_bba_read_manifest.R | 28 +- tests/testthat/test_bba_store_model.R | 24 +- tests/testthat/test_bbb_store_manifest_yaml.R | 28 +- tests/testthat/test_caa_fit_model.R | 243 +++++++++++------ tests/testthat/test_cba_fit_model_manifest.R | 96 +++++-- tests/testthat/test_cba_model_impute.R | 106 +++++--- tests/testthat/test_daa_get_model_parameter.R | 246 +++++++++++------- .../test_daa_get_status_fingerprint.R | 30 ++- tests/testthat/test_eaa_get_result.R | 128 ++++++--- 96 files changed, 2709 insertions(+), 1163 deletions(-) rename man/{select_factor_count_strictly_positive.Rd => select_factor_count_non_zero.Rd} (84%) diff --git a/NAMESPACE b/NAMESPACE index 40a0f55b..bae9efcc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,7 +13,7 @@ export(moving_average) export(moving_difference) export(moving_trend) export(order_manifest) -export(select_factor_count_strictly_positive) +export(select_factor_count_non_zero) export(select_factor_threshold) export(select_observed_range) export(sha1) diff --git a/R/combine.R b/R/combine.R index d887f52f..1ab96c5b 100644 --- a/R/combine.R +++ b/R/combine.R @@ -46,15 +46,19 @@ setMethod( )) r_package <- r_package[order(r_package$description, r_package$version), ] analysis_version_r_package <- lapply( - dots, slot, "AnalysisVersionRPackage" + dots, + slot, + "AnalysisVersionRPackage" ) %>% bind_rows() %>% distinct() analysis_version_r_package <- analysis_version_r_package %>% arrange(.data$analysis_version, .data$r_package) new( - "n2kAnalysisVersion", AnalysisVersion = analysis_version, - RPackage = r_package, AnalysisVersionRPackage = analysis_version_r_package + "n2kAnalysisVersion", + AnalysisVersion = analysis_version, + RPackage = r_package, + AnalysisVersionRPackage = analysis_version_r_package ) } ) @@ -75,7 +79,8 @@ setMethod( bind_rows() %>% distinct() new( - "n2kParameter", Parameter = parameter, + "n2kParameter", + Parameter = parameter, ParameterEstimate = parameter_estimate ) } diff --git a/R/combine_result.R b/R/combine_result.R index 841dedb0..ce66639e 100644 --- a/R/combine_result.R +++ b/R/combine_result.R @@ -31,7 +31,9 @@ setMethod( arrange(.data$fingerprint) analysis_version_r_package <- lapply( - dots, slot, name = "AnalysisVersionRPackage" + dots, + slot, + name = "AnalysisVersionRPackage" ) %>% do.call(what = rbind) %>% distinct() %>% diff --git a/R/fit_model_character.R b/R/fit_model_character.R index b9dba0ff..e9bbb1eb 100644 --- a/R/fit_model_character.R +++ b/R/fit_model_character.R @@ -21,7 +21,13 @@ setMethod( f = "fit_model", signature = signature(x = "character"), definition = function( - x, base, project, status = c("new", "waiting"), verbose = TRUE, ..., bucket + x, + base, + project, + status = c("new", "waiting"), + verbose = TRUE, + ..., + bucket ) { assert_that(is.string(x)) display(verbose, x) @@ -50,18 +56,24 @@ setMethod( } dots <- list(...) if ( - !has_name(dots, "local") || is.null(dots$local) || + !has_name(dots, "local") || + is.null(dots$local) || !inherits(base, "s3_bucket") ) { analysis <- read_model(hash, base = base, project = project) display(verbose, paste(status(analysis), "-> "), FALSE) analysis <- fit_model( - x = analysis, status = status, base = base, project = project, ... + x = analysis, + status = status, + base = base, + project = project, + ... ) display(verbose, status(analysis)) store_model(analysis, base = base, project = project) result <- data.frame( - fingerprint = get_file_fingerprint(analysis), status = status(analysis) + fingerprint = get_file_fingerprint(analysis), + status = status(analysis) ) rm(analysis) gc(verbose = FALSE) @@ -69,18 +81,25 @@ setMethod( } dots <- list(...) to_do <- object_status( - base = base, project = project, status = status, hash = x + base = base, + project = project, + status = status, + hash = x ) if (length(to_do) == 0) { display(verbose, "skipping") gc(verbose = FALSE) result <- data.frame( - fingerprint = hash, status = "converged" + fingerprint = hash, + status = "converged" ) return(invisible(result)) } download_model( - hash = hash, base = base, local = dots$local, project = project, + hash = hash, + base = base, + local = dots$local, + project = project, verbose = verbose ) analysis <- read_model(hash, base = dots$local, project = project) @@ -88,21 +107,33 @@ setMethod( slot(analysis, "AnalysisRelation") |> mutate( downloaded = map( - .data$parent_analysis, download_model, base = base, - local = dots$local, project = project, verbose = verbose + .data$parent_analysis, + download_model, + base = base, + local = dots$local, + project = project, + verbose = verbose ) ) analysis <- fit_model( - x = analysis, status = status, base = dots$local, project = project, ... + x = analysis, + status = status, + base = dots$local, + project = project, + ... ) display(verbose, status(analysis)) store_model(analysis, base = dots$local, project = project) download_model( - hash = hash, local = base, base = dots$local, project = project, + hash = hash, + local = base, + base = dots$local, + project = project, verbose = verbose ) result <- data.frame( - fingerprint = get_file_fingerprint(analysis), status = status(analysis) + fingerprint = get_file_fingerprint(analysis), + status = status(analysis) ) rm(analysis) gc(verbose = FALSE) diff --git a/R/fit_model_n2k_aggregate.R b/R/fit_model_n2k_aggregate.R index d592d4dc..88e4098d 100644 --- a/R/fit_model_n2k_aggregate.R +++ b/R/fit_model_n2k_aggregate.R @@ -43,10 +43,13 @@ setMethod( setNames("Data") |> c( list( - Class = "rawImputed", Response = "Count", Minimum = "", + Class = "rawImputed", + Response = "Count", + Minimum = "", Imputation = parent@AggregatedImputed@Imputation, Extra = cbind( - parent@AggregatedImputed@Covariate[0, ], Count = integer(0) + parent@AggregatedImputed@Covariate[0, ], + Count = integer(0) ) ) ) |> diff --git a/R/fit_model_n2k_composite.R b/R/fit_model_n2k_composite.R index 1170cd11..67fdd5a8 100644 --- a/R/fit_model_n2k_composite.R +++ b/R/fit_model_n2k_composite.R @@ -25,14 +25,22 @@ setMethod( filter(!is.na(.data$estimate), !is.na(.data$variance)) |> group_by(.data$value) |> summarise( - estimate = mean(.data$estimate), se = sqrt(sum(.data$variance)) / n() + estimate = mean(.data$estimate), + se = sqrt(sum(.data$variance)) / n() ) |> transmute( - .data$value, .data$estimate, - lower_confidence_limit = - qnorm(0.025, mean = .data$estimate, sd = .data$se), - upper_confidence_limit = - qnorm(0.975, mean = .data$estimate, sd = .data$se) + .data$value, + .data$estimate, + lower_confidence_limit = qnorm( + 0.025, + mean = .data$estimate, + sd = .data$se + ), + upper_confidence_limit = qnorm( + 0.975, + mean = .data$estimate, + sd = .data$se + ) ) |> as.data.frame() -> x@Index status(x) <- "converged" @@ -52,12 +60,14 @@ setMethod( ) if (inherits(model, "try-error")) { parent_status[ - parent_status$parent_analysis == this_parent, "parent_status" + parent_status$parent_analysis == this_parent, + "parent_status" ] <- "error" next } parent_status[ - parent_status$parent_analysis == this_parent, "parent_status" + parent_status$parent_analysis == this_parent, + "parent_status" ] <- status(model) parent_status[ parent_status$parent_analysis == this_parent, diff --git a/R/fit_model_n2k_hurdle_imputed.R b/R/fit_model_n2k_hurdle_imputed.R index be37d213..82efa589 100644 --- a/R/fit_model_n2k_hurdle_imputed.R +++ b/R/fit_model_n2k_hurdle_imputed.R @@ -8,7 +8,11 @@ setMethod( f = "fit_model", signature = signature(x = "n2kHurdleImputed"), definition = function( - x, base, project, status = c("new", "waiting"), ... + x, + base, + project, + status = c("new", "waiting"), + ... ) { validObject(x) assert_that(is.character(status), length(status) >= 1) @@ -20,7 +24,9 @@ setMethod( if (is.null(x@Presence) || status(x) %in% c("converged", "error")) { presence <- read_model( - x@AnalysisRelation$parent_analysis[1], base = base, project = project + x@AnalysisRelation$parent_analysis[1], + base = base, + project = project ) x@Presence <- presence@RawImputed x@AnalysisRelation$parentstatus_fingerprint[1] <- get_status_fingerprint( @@ -30,18 +36,28 @@ setMethod( rm(presence) gc() x@AnalysisMetadata$status <- ifelse( - all(x@AnalysisRelation$parentstatus == "converged"), "new", + all(x@AnalysisRelation$parentstatus == "converged"), + "new", ifelse( - any(!x@AnalysisRelation$parentstatus %in% - c("new", "waiting", "converged")), - "error", "waiting" + any( + !x@AnalysisRelation$parentstatus %in% + c("new", "waiting", "converged") + ), + "error", + "waiting" ) ) x@AnalysisMetadata$status_fingerprint <- sha1( list( - get_file_fingerprint(x), x@AnalysisMetadata$status, - x@AnalysisVersion$fingerprint, x@AnalysisVersion, x@RPackage, - x@AnalysisVersionRPackage, x@AnalysisRelation, x@Presence, x@Count, + get_file_fingerprint(x), + x@AnalysisMetadata$status, + x@AnalysisVersion$fingerprint, + x@AnalysisVersion, + x@RPackage, + x@AnalysisVersionRPackage, + x@AnalysisRelation, + x@Presence, + x@Count, x@Hurdle ), digits = 6L @@ -50,7 +66,9 @@ setMethod( } if (is.null(x@Count) || status(x) %in% c("converged", "error")) { count <- read_model( - x@AnalysisRelation$parent_analysis[2], base = base, project = project + x@AnalysisRelation$parent_analysis[2], + base = base, + project = project ) x@Count <- count@RawImputed x@AnalysisRelation$parentstatus_fingerprint[2] <- get_status_fingerprint( @@ -61,18 +79,28 @@ setMethod( gc() x@AnalysisMetadata$status <- ifelse( - all(x@AnalysisRelation$parentstatus == "converged"), "new", + all(x@AnalysisRelation$parentstatus == "converged"), + "new", ifelse( - any(!x@AnalysisRelation$parentstatus %in% - c("new", "waiting", "converged")), - "error", "waiting" + any( + !x@AnalysisRelation$parentstatus %in% + c("new", "waiting", "converged") + ), + "error", + "waiting" ) ) x@AnalysisMetadata$status_fingerprint <- sha1( list( - get_file_fingerprint(x), x@AnalysisMetadata$status, - x@AnalysisVersion$fingerprint, x@AnalysisVersion, x@RPackage, - x@AnalysisVersionRPackage, x@AnalysisRelation, x@Presence, x@Count, + get_file_fingerprint(x), + x@AnalysisMetadata$status, + x@AnalysisVersion$fingerprint, + x@AnalysisVersion, + x@RPackage, + x@AnalysisVersionRPackage, + x@AnalysisRelation, + x@Presence, + x@Count, x@Hurdle ), digits = 6L @@ -88,9 +116,16 @@ setMethod( if (inherits(result, "try-error")) { x@AnalysisMetadata$status_fingerprint <- sha1( list( - get_file_fingerprint(x), "error", x@AnalysisVersion$fingerprint, - x@AnalysisVersion, x@RPackage, x@AnalysisVersionRPackage, - x@AnalysisRelation, x@Presence, x@Count, NULL + get_file_fingerprint(x), + "error", + x@AnalysisVersion$fingerprint, + x@AnalysisVersion, + x@RPackage, + x@AnalysisVersionRPackage, + x@AnalysisRelation, + x@Presence, + x@Count, + NULL ), digits = 6L ) @@ -99,9 +134,16 @@ setMethod( } x@AnalysisMetadata$status_fingerprint <- sha1( list( - get_file_fingerprint(x), "converged", x@AnalysisVersion$fingerprint, - x@AnalysisVersion, x@RPackage, x@AnalysisVersionRPackage, - x@AnalysisRelation, x@Presence, x@Count, result + get_file_fingerprint(x), + "converged", + x@AnalysisVersion$fingerprint, + x@AnalysisVersion, + x@RPackage, + x@AnalysisVersionRPackage, + x@AnalysisRelation, + x@Presence, + x@Count, + result ), digits = 6L ) diff --git a/R/fit_model_n2k_inla.R b/R/fit_model_n2k_inla.R index 556ac0bd..80807999 100644 --- a/R/fit_model_n2k_inla.R +++ b/R/fit_model_n2k_inla.R @@ -9,8 +9,13 @@ setMethod( f = "fit_model", signature = signature(x = "n2kInla"), definition = function( - x, status = "new", ..., timeout = NULL, seed = get_seed(x), - num_threads = NULL, parallel_configs = TRUE + x, + status = "new", + ..., + timeout = NULL, + seed = get_seed(x), + num_threads = NULL, + parallel_configs = TRUE ) { assert_that( requireNamespace("INLA", quietly = TRUE), @@ -41,11 +46,17 @@ setMethod( response <- all.vars(fm)[attr(fm, "response")] if (mean(is.na(data[[response]])) < 0.10) { model <- direct_fit( - control = control, data = data, lc = lc, timeout = timeout + control = control, + data = data, + lc = lc, + timeout = timeout ) } else { model <- indirect_fit( - response = response, control = control, data = data, lc = lc, + response = response, + control = control, + data = data, + lc = lc, timeout = timeout ) } @@ -53,7 +64,9 @@ setMethod( # handle error in model fit if (inherits(model, "try-error")) { status(x) <- ifelse( - grepl("time limit", model), "time-out", "error" + grepl("time limit", model), + "time-out", + "error" ) return(x) } @@ -63,8 +76,12 @@ setMethod( } imputed <- try(impute( - model = model, n_imp = x@ImputationSize, minimum = x@Minimum, - seed = seed, num_threads = num_threads, extra = x@Extra, + model = model, + n_imp = x@ImputationSize, + minimum = x@Minimum, + seed = seed, + num_threads = num_threads, + extra = x@Extra, parallel_configs = parallel_configs )) if (inherits(imputed, "try-error")) { @@ -72,7 +89,10 @@ setMethod( } # return fitted model with imputations return(n2k_inla( - data = x, model_fit = model, status = "converged", raw_imputed = imputed + data = x, + model_fit = model, + status = "converged", + raw_imputed = imputed )) } ) @@ -102,13 +122,16 @@ model2lincomb <- function(lincomb) { direct_fit <- function(control, data, lc, timeout = NULL) { control$data <- data control$lincomb <- lc - try({ - if (!is.null(timeout)) { - assert_that(is.number(timeout), timeout > 0) - setTimeLimit(cpu = timeout, elapsed = timeout) - } - do.call(INLA::inla, control) - }, silent = TRUE) + try( + { + if (!is.null(timeout)) { + assert_that(is.number(timeout), timeout > 0) + setTimeLimit(cpu = timeout, elapsed = timeout) + } + do.call(INLA::inla, control) + }, + silent = TRUE + ) } #' @importFrom assertthat assert_that is.number @@ -117,24 +140,30 @@ indirect_fit <- function(control, data, lc, response, timeout = NULL) { compute <- control$control.compute control$data <- data[!is.na(data[[response]]), ] control$control.compute <- NULL - m0 <- try({ - if (!is.null(timeout)) { - assert_that(is.number(timeout), timeout > 0) - setTimeLimit(cpu = timeout, elapsed = timeout) - } - do.call(INLA::inla, control) - }, silent = TRUE) - if (inherits(m0, "try-error") && "control.family" %in% names(control)) { - # when model failed to fit, try again without family - old_control_family <- control$control.family - control$control.family <- NULL - m0 <- try({ + m0 <- try( + { if (!is.null(timeout)) { assert_that(is.number(timeout), timeout > 0) setTimeLimit(cpu = timeout, elapsed = timeout) } do.call(INLA::inla, control) - }, silent = TRUE) + }, + silent = TRUE + ) + if (inherits(m0, "try-error") && "control.family" %in% names(control)) { + # when model failed to fit, try again without family + old_control_family <- control$control.family + control$control.family <- NULL + m0 <- try( + { + if (!is.null(timeout)) { + assert_that(is.number(timeout), timeout > 0) + setTimeLimit(cpu = timeout, elapsed = timeout) + } + do.call(INLA::inla, control) + }, + silent = TRUE + ) control$control.family <- old_control_family } if (inherits(m0, "try-error")) { @@ -145,11 +174,14 @@ indirect_fit <- function(control, data, lc, response, timeout = NULL) { control$lincomb <- lc control$control.update <- list(result = m0) control$control.compute <- compute - try({ - if (!is.null(timeout)) { - assert_that(is.number(timeout), timeout > 0) - setTimeLimit(cpu = timeout, elapsed = timeout) - } - do.call(INLA::inla, control) - }, silent = TRUE) + try( + { + if (!is.null(timeout)) { + assert_that(is.number(timeout), timeout > 0) + setTimeLimit(cpu = timeout, elapsed = timeout) + } + do.call(INLA::inla, control) + }, + silent = TRUE + ) } diff --git a/R/fit_model_n2k_manifest.R b/R/fit_model_n2k_manifest.R index c291f3e9..d374f933 100644 --- a/R/fit_model_n2k_manifest.R +++ b/R/fit_model_n2k_manifest.R @@ -12,11 +12,19 @@ setMethod( f = "fit_model", signature = signature(x = "n2kManifest"), definition = function( - x, base, project, status = c("new", "waiting"), verbose = TRUE, ..., + x, + base, + project, + status = c("new", "waiting"), + verbose = TRUE, + ..., local = NULL ) { assert_that( - is.string(project), noNA(project), is.character(status), noNA(status), + is.string(project), + noNA(project), + is.character(status), + noNA(status), length(status) >= 1 ) to_do <- order_manifest(x) @@ -31,7 +39,10 @@ setMethod( } else { data.frame(fingerprint = character(0), status = character(0)) |> write.table( - file = cache_file, sep = "\t", row.names = FALSE, quote = FALSE + file = cache_file, + sep = "\t", + row.names = FALSE, + quote = FALSE ) } start_time <- Sys.time() @@ -39,23 +50,35 @@ setMethod( display( verbose = verbose, message = sprintf( - "Processing %i from %i (%.2f%%) %s ETA %s", i, length(to_do), + "Processing %i from %i (%.2f%%) %s ETA %s", + i, + length(to_do), 100 * (i - 1) / length(to_do), format(Sys.time(), "%Y-%m-%d %H:%M:%S"), format( - start_time + (Sys.time() - start_time) * length(to_do) / (i - 1), + start_time + (Sys.time() - start_time) * length(to_do) / (i - 1), "%d %H:%M" ) ) ) result <- try(fit_model( - x = to_do[i], base = base, project = project, status = status, - verbose = verbose, ..., local = local + x = to_do[i], + base = base, + project = project, + status = status, + verbose = verbose, + ..., + local = local )) if (!inherits(result, "try-error")) { write.table( - result, file = cache_file, append = TRUE, sep = "\t", - row.names = FALSE, quote = FALSE, col.names = FALSE + result, + file = cache_file, + append = TRUE, + sep = "\t", + row.names = FALSE, + quote = FALSE, + col.names = FALSE ) } } diff --git a/R/fit_model_n2k_model_imputed.R b/R/fit_model_n2k_model_imputed.R index 48cbc00b..aa81d76c 100644 --- a/R/fit_model_n2k_model_imputed.R +++ b/R/fit_model_n2k_model_imputed.R @@ -28,7 +28,9 @@ setMethod( x@AnalysisRelation$parentstatus_fingerprint <- parent[[1]]@AnalysisMetadata$status_fingerprint status(x) <- ifelse( - parent_status %in% c("new", "waiting"), "waiting", "error" + parent_status %in% c("new", "waiting"), + "waiting", + "error" ) return(x) } @@ -39,7 +41,10 @@ setMethod( status(x) <- "new" } stopifnot(all(vapply( - x@Package, FUN = require, FUN.VALUE = logical(1), quietly = TRUE, + x@Package, + FUN = require, + FUN.VALUE = logical(1), + quietly = TRUE, character.only = TRUE ))) model_args <- x@ModelArgs @@ -48,10 +53,14 @@ setMethod( } model <- try( model_impute( - object = x@AggregatedImputed, model_fun = x@Function, + object = x@AggregatedImputed, + model_fun = x@Function, rhs = gsub("~", "", x@AnalysisMetadata$formula), - model_args = model_args, extractor = x@Extractor, mutate = x@Mutate, - extractor_args = x@ExtractorArgs, filter = filter2function(x@Filter) + model_args = model_args, + extractor = x@Extractor, + mutate = x@Mutate, + extractor_args = x@ExtractorArgs, + filter = filter2function(x@Filter) ) ) if ("try-error" %in% class(model)) { diff --git a/R/fit_model_n2k_spde.R b/R/fit_model_n2k_spde.R index 2c0816aa..aaec4565 100644 --- a/R/fit_model_n2k_spde.R +++ b/R/fit_model_n2k_spde.R @@ -9,8 +9,13 @@ setMethod( f = "fit_model", signature = signature(x = "n2kSpde"), definition = function( - x, status = "new", ..., timeout = NULL, seed = get_seed(x), - num_threads = NULL, parallel_configs = TRUE + x, + status = "new", + ..., + timeout = NULL, + seed = get_seed(x), + num_threads = NULL, + parallel_configs = TRUE ) { assert_that( requireNamespace("INLA", quietly = TRUE), @@ -41,7 +46,9 @@ setMethod( tag = "observed", effects = list( data[ - !is.na(data[[response]]), colnames(data) != response, drop = FALSE + !is.na(data[[response]]), + colnames(data) != response, + drop = FALSE ], index ) @@ -66,53 +73,68 @@ setMethod( # directly fit model when less than 10% missing data control$data <- INLA::inla.stack.data(stack_total, spde = spde) control$control.predictor <- list( - A = INLA::inla.stack.A(stack_total), link = 1 + A = INLA::inla.stack.A(stack_total), + link = 1 ) control$lincomb <- lc - model <- try({ - if (!is.null(timeout)) { - assert_that(is.number(timeout), timeout > 0) - setTimeLimit(cpu = timeout, elapsed = timeout) - } - do.call(INLA::inla, control) - }, silent = TRUE) + model <- try( + { + if (!is.null(timeout)) { + assert_that(is.number(timeout), timeout > 0) + setTimeLimit(cpu = timeout, elapsed = timeout) + } + do.call(INLA::inla, control) + }, + silent = TRUE + ) } else { # first fit model without missing data control$data <- INLA::inla.stack.data(stack_observed, spde = spde) control$control.predictor <- list(A = INLA::inla.stack.A(stack_observed)) - m0 <- try({ - if (!is.null(timeout)) { - assert_that(is.number(timeout), timeout > 0) - setTimeLimit(cpu = timeout, elapsed = timeout) - } - do.call(INLA::inla, control) - }, silent = TRUE) + m0 <- try( + { + if (!is.null(timeout)) { + assert_that(is.number(timeout), timeout > 0) + setTimeLimit(cpu = timeout, elapsed = timeout) + } + do.call(INLA::inla, control) + }, + silent = TRUE + ) if (inherits(m0, "try-error")) { status(x) <- ifelse( - grepl("time limit", m0), "time-out", "error" + grepl("time limit", m0), + "time-out", + "error" ) return(x) } # then refit with missing data control$data <- INLA::inla.stack.data(stack_total, spde = spde) control$control.predictor <- list( - A = INLA::inla.stack.A(stack_total), link = 1 + A = INLA::inla.stack.A(stack_total), + link = 1 ) control$lincomb <- lc control$control.update <- list(result = m0) - model <- try({ - if (!is.null(timeout)) { - assert_that(is.number(timeout), timeout > 0) - setTimeLimit(cpu = timeout, elapsed = timeout) - } - do.call(INLA::inla, control) - }, silent = TRUE) + model <- try( + { + if (!is.null(timeout)) { + assert_that(is.number(timeout), timeout > 0) + setTimeLimit(cpu = timeout, elapsed = timeout) + } + do.call(INLA::inla, control) + }, + silent = TRUE + ) } # handle error in model fit if (inherits(model, "try-error")) { status(x) <- ifelse( - grepl("time limit", model), "time-out", "error" + grepl("time limit", model), + "time-out", + "error" ) return(x) } @@ -122,8 +144,12 @@ setMethod( } imputed <- try(impute( - model = model, n_imp = x@ImputationSize, minimum = x@Minimum, - seed = seed, num_threads = num_threads, extra = x@Extra, + model = model, + n_imp = x@ImputationSize, + minimum = x@Minimum, + seed = seed, + num_threads = num_threads, + extra = x@Extra, parallel_configs = parallel_configs )) if (inherits(imputed, "try-error")) { @@ -131,7 +157,10 @@ setMethod( } # return fitted model with imputations return(n2k_spde( - data = x, model_fit = model, status = "converged", raw_imputed = imputed + data = x, + model_fit = model, + status = "converged", + raw_imputed = imputed )) } ) diff --git a/R/fit_model_s3_object.R b/R/fit_model_s3_object.R index 537aca1b..f8964b22 100644 --- a/R/fit_model_s3_object.R +++ b/R/fit_model_s3_object.R @@ -37,8 +37,11 @@ setMethod( analysis <- s3readRDS(object = x) display(dots$verbose, paste(status(analysis), "-> "), FALSE) analysis_fitted <- fit_model( - x = analysis, status = status, base = dots$base, - project = dots$project, ... + x = analysis, + status = status, + base = dots$base, + project = dots$project, + ... ) display(dots$verbose, status(analysis_fitted)) store_model(analysis_fitted, base = dots$base, project = dots$project) diff --git a/R/get_analysis_version.R b/R/get_analysis_version.R index 4ef396b3..f73eaf23 100644 --- a/R/get_analysis_version.R +++ b/R/get_analysis_version.R @@ -22,7 +22,8 @@ setMethod( signature = signature(version = "data.frame"), definition = function(version) { check_dataframe_variable( - df = version, name = "version", + df = version, + name = "version", variable = c("description", "version", "origin", "fingerprint") ) assert_that( @@ -30,15 +31,18 @@ setMethod( msg = "Missing analysis_version attribute" ) analysis_version <- data.frame( - fingerprint = attr(version, "analysis_version"), stringsAsFactors = FALSE + fingerprint = attr(version, "analysis_version"), + stringsAsFactors = FALSE ) version <- version[order(version$description, version$version), ] new( "n2kAnalysisVersion", - AnalysisVersion = analysis_version, RPackage = version, + AnalysisVersion = analysis_version, + RPackage = version, AnalysisVersionRPackage = data.frame( analysis_version = analysis_version$fingerprint, - r_package = version$fingerprint, stringsAsFactors = FALSE + r_package = version$fingerprint, + stringsAsFactors = FALSE ) ) } diff --git a/R/get_anomaly_n2k_inla.R b/R/get_anomaly_n2k_inla.R index bcb46b70..c2e0bbdb 100644 --- a/R/get_anomaly_n2k_inla.R +++ b/R/get_anomaly_n2k_inla.R @@ -24,13 +24,22 @@ setMethod( f = "get_anomaly", signature = signature(analysis = "n2kInla"), definition = function( - analysis, n = 20, expected_ratio = 5, expected_absent = 5, - random_threshold = 1.05, verbose = TRUE, ... + analysis, + n = 20, + expected_ratio = 5, + expected_absent = 5, + random_threshold = 1.05, + verbose = TRUE, + ... ) { assert_that( - is.count(n), is.number(expected_ratio), expected_ratio > 1, - is.number(expected_absent), expected_absent > 1, - is.number(random_threshold), random_threshold > 1 + is.count(n), + is.number(expected_ratio), + expected_ratio > 1, + is.number(expected_absent), + expected_absent > 1, + is.number(random_threshold), + random_threshold > 1 ) if (!is.null(analysis@Model)) { assert_that( @@ -38,11 +47,16 @@ setMethod( msg = "multiple response not handled yet" ) assert_that( - analysis@Model$.args$family %in% c( - "binomial", "nbinomial", "poisson", "zeroinflatednbinomial0", - "zeroinflatednbinomial1", "zeroinflatedpoisson0", - "zeroinflatedpoisson1" - ), + analysis@Model$.args$family %in% + c( + "binomial", + "nbinomial", + "poisson", + "zeroinflatednbinomial0", + "zeroinflatednbinomial1", + "zeroinflatedpoisson0", + "zeroinflatedpoisson1" + ), msg = paste(analysis@Model$.args$family, "not handled yet") ) log_expected_ratio <- log(expected_ratio) @@ -51,12 +65,15 @@ setMethod( } parameter <- get_model_parameter( - analysis = analysis, verbose = verbose, ... + analysis = analysis, + verbose = verbose, + ... ) if (status(analysis) != "converged") { return( new( - "n2kAnomaly", Parameter = parameter@Parameter, + "n2kAnomaly", + Parameter = parameter@Parameter, ParameterEstimate = parameter@ParameterEstimate ) ) @@ -68,15 +85,18 @@ setMethod( description = c( "Large ratio of observed vs expected", "Small ratio of observed vs expected", - "Zero observed and high expected", "Unstable imputations" + "Zero observed and high expected", + "Unstable imputations" ) ) |> mutate( - fingerprint = map_chr(.data$description, ~sha1(c(description = .x))) + fingerprint = map_chr(.data$description, ~ sha1(c(description = .x))) ) anomaly <- tibble( - anomaly_type = character(0), analysis = character(0), - parameter = character(0), observation = character(0), + anomaly_type = character(0), + analysis = character(0), + parameter = character(0), + observation = character(0), datafield = character(0) ) @@ -108,7 +128,8 @@ setMethod( display(verbose, ": observed > 0 vs fit", FALSE) data |> filter( - is.finite(.data$log_ratio), .data$log_ratio > log_expected_ratio + is.finite(.data$log_ratio), + .data$log_ratio > log_expected_ratio ) |> select("analysis", "parameter", "observation", "datafield") |> slice_head(n = n) |> @@ -120,7 +141,8 @@ setMethod( bind_rows(anomaly) -> anomaly data |> filter( - is.finite(.data$log_ratio), .data$log_ratio < -log_expected_ratio + is.finite(.data$log_ratio), + .data$log_ratio < -log_expected_ratio ) |> select("analysis", "parameter", "observation", "datafield") |> slice_head(n = n) |> @@ -172,7 +194,7 @@ setMethod( distinct(.data$anomaly_type) |> select(description = "anomaly_type") |> mutate( - fingerprint = map_chr(.data$description, ~sha1(c(description = .x))) + fingerprint = map_chr(.data$description, ~ sha1(c(description = .x))) ) |> bind_rows(anomaly_type) anomaly <- re_anomaly |> @@ -189,7 +211,8 @@ setMethod( semi_join(x = parameter@Parameter, by = c("parent" = "fingerprint")) |> select("fingerprint", observation = "description") |> inner_join( - x = parameter@ParameterEstimate, by = c("parameter" = "fingerprint") + x = parameter@ParameterEstimate, + by = c("parameter" = "fingerprint") ) |> mutate(anomaly_type = parent$fingerprint) anomaly <- anomaly |> diff --git a/R/get_datafield_id.R b/R/get_datafield_id.R index 6836271d..7b69e449 100644 --- a/R/get_datafield_id.R +++ b/R/get_datafield_id.R @@ -8,19 +8,29 @@ #' @importFrom git2rdata is_git2rmeta update_metadata verify_vc write_vc get_datafield_id <- function(table, field, datasource, root, stage = FALSE) { assert_that( - is.string(table), is.string(field), is.string(datasource), noNA(table), - noNA(field), noNA(datasource) + is.string(table), + is.string(field), + is.string(datasource), + noNA(table), + noNA(field), + noNA(datasource) ) if (!is_git2rmeta(file = "datafield", root = root)) { data.frame(id = 1L, table = table, field = field, source = datasource) |> write_vc(file = "datafield", root = root, sorting = "id", stage = stage) update_metadata( - file = "datafield", root = root, stage = stage, name = "datafield", + file = "datafield", + root = root, + stage = stage, + name = "datafield", title = "Pointer to external code identifiers", - description = - "This dataset describes the external code identifiers used in the data. -It points to the original source of the external code: which datasource, which -table in that datasource and which field in that table.", + description = paste( + "This dataset describes the external code identifiers used in the", + "data.", + "It points to the original source of the external code: which", + "datasource, which table in that datasource and which field in that", + "table." + ), field_description = c( id = "unique identifier of the datafield", table = "table name of the identifier", @@ -31,11 +41,13 @@ table in that datasource and which field in that table.", return(1L) } datafield <- verify_vc( - file = "datafield", root = root, + file = "datafield", + root = root, variables = c("id", "table", "field", "source") ) which( - datafield$table == table & datafield$field == field & + datafield$table == table & + datafield$field == field & datafield$source == datasource ) -> relevant stopifnot("multiple matching datafield id found" = length(relevant) <= 1) diff --git a/R/get_model_parameter_n2k_aggregated.R b/R/get_model_parameter_n2k_aggregated.R index 6be40429..449af020 100644 --- a/R/get_model_parameter_n2k_aggregated.R +++ b/R/get_model_parameter_n2k_aggregated.R @@ -15,7 +15,8 @@ setMethod( } parameter <- data.frame( - description = "AggregatedImputed", parent = NA_character_, + description = "AggregatedImputed", + parent = NA_character_, fingerprint = sha1(c("AggregatedImputed", NA_character_)), stringsAsFactors = FALSE ) @@ -33,8 +34,9 @@ setMethod( mutate( description = i, fingerprint = map2_chr( - .data$description, .data$parent, - ~sha1(c(description = .x, parent = .y)) + .data$description, + .data$parent, + ~ sha1(c(description = .x, parent = .y)) ) ) observations <- observations %>% @@ -50,8 +52,9 @@ setMethod( distinct() %>% mutate( fingerprint = map2_chr( - .data$description, .data$parent, - ~sha1(c(description = .x, parent = .y)) + .data$description, + .data$parent, + ~ sha1(c(description = .x, parent = .y)) ) ) link <- c("parent", "description") @@ -69,7 +72,9 @@ setMethod( t() %>% as.data.frame() %>% select( - estimate = 1, lower_confidence_limit = 2, upper_confidence_limit = 3 + estimate = 1, + lower_confidence_limit = 2, + upper_confidence_limit = 3 ) %>% mutate( analysis = get_file_fingerprint(analysis), diff --git a/R/get_model_parameter_n2k_composite.R b/R/get_model_parameter_n2k_composite.R index 8c0deb90..e4c2a7a7 100644 --- a/R/get_model_parameter_n2k_composite.R +++ b/R/get_model_parameter_n2k_composite.R @@ -10,13 +10,15 @@ setMethod( } parameter <- data.frame( - description = "Composite index", parent = NA_character_, + description = "Composite index", + parent = NA_character_, stringsAsFactors = FALSE ) parameter$fingerprint <- apply(parameter, 1, sha1) parameter_estimate <- cbind( - analysis = analysis@AnalysisMetadata$file_fingerprint, analysis@Index, + analysis = analysis@AnalysisMetadata$file_fingerprint, + analysis@Index, stringsAsFactors = FALSE ) colnames(parameter_estimate)[2] <- "parameter" @@ -24,7 +26,8 @@ setMethod( extra <- data.frame( description = parameter_estimate$parameter, - parent = parameter$fingerprint, stringsAsFactors = FALSE + parent = parameter$fingerprint, + stringsAsFactors = FALSE ) extra$fingerprint <- apply(extra, 1, sha1) parameter <- rbind(parameter, extra) @@ -32,7 +35,8 @@ setMethod( parameter_estimate$parameter <- extra$fingerprint new( - "n2kParameter", Parameter = parameter, + "n2kParameter", + Parameter = parameter, ParameterEstimate = parameter_estimate ) } diff --git a/R/get_model_parameter_n2k_hurdle_imputed.R b/R/get_model_parameter_n2k_hurdle_imputed.R index e1cb9692..72cead35 100644 --- a/R/get_model_parameter_n2k_hurdle_imputed.R +++ b/R/get_model_parameter_n2k_hurdle_imputed.R @@ -15,7 +15,8 @@ setMethod( } parameter <- data.frame( - description = "HurdleImputed", parent = NA_character_, + description = "HurdleImputed", + parent = NA_character_, fingerprint = sha1(c("HurdleImputed", NA_character_)), stringsAsFactors = FALSE ) @@ -30,8 +31,9 @@ setMethod( mutate( description = i, fingerprint = map2_chr( - .data$description, .data$parent, - ~sha1(c(description = .x, parent = .y)) + .data$description, + .data$parent, + ~ sha1(c(description = .x, parent = .y)) ) ) observations <- observations |> @@ -43,8 +45,9 @@ setMethod( distinct() |> mutate( fingerprint = map2_chr( - .data$description, .data$parent, - ~sha1(c(description = .x, parent = .y)) + .data$description, + .data$parent, + ~ sha1(c(description = .x, parent = .y)) ) ) link <- c("parent", "description") @@ -55,13 +58,16 @@ setMethod( parameter <- bind_rows(parameter, extra) } new( - "n2kParameter", Parameter = parameter, + "n2kParameter", + Parameter = parameter, ParameterEstimate = analysis@Hurdle@Imputation |> apply(1, quantile, c(0.5, 0.025, 0.975)) |> t() |> as.data.frame() |> select( - estimate = 1, lower_confidence_limit = 2, upper_confidence_limit = 3 + estimate = 1, + lower_confidence_limit = 2, + upper_confidence_limit = 3 ) |> mutate( analysis = get_file_fingerprint(analysis), diff --git a/R/get_model_parameter_n2k_inla.R b/R/get_model_parameter_n2k_inla.R index 8275922e..2082789e 100644 --- a/R/get_model_parameter_n2k_inla.R +++ b/R/get_model_parameter_n2k_inla.R @@ -19,15 +19,20 @@ setMethod( } parameter <- tibble( description = c( - "Fixed effect", "Random effect BLUP", "Random effect variance", - "Fitted", "Overdispersion", "WAIC", "Imputed value" + "Fixed effect", + "Random effect BLUP", + "Random effect variance", + "Fitted", + "Overdispersion", + "WAIC", + "Imputed value" ), parent = NA_character_ ) %>% mutate( fingerprint = map_chr( .data$description, - ~sha1(c(description = .x, parent = NA_character_)) + ~ sha1(c(description = .x, parent = NA_character_)) ) ) @@ -35,15 +40,18 @@ setMethod( display(verbose, " reading model parameters: fixed effects", FALSE) variable <- c( - "Intercept", attr(terms(analysis@AnalysisFormula[[1]]), "term.labels") + "Intercept", + attr(terms(analysis@AnalysisFormula[[1]]), "term.labels") ) variable <- variable[!grepl("f\\(", variable)] fixed <- get_model(analysis)$summary.fixed if (nrow(fixed) == 0) { parameter_estimate <- data.frame( - analysis = character(0), parameter = character(0), - estimate = numeric(0), lower_confidence_limit = numeric(0), + analysis = character(0), + parameter = character(0), + estimate = numeric(0), + lower_confidence_limit = numeric(0), upper_confidence_limit = numeric(0) ) } else { @@ -65,11 +73,13 @@ setMethod( parameter_estimate <- model_parameter_main( parameter_estimate = parameter_estimate, - main_effect = variable[!interaction], fixed_parent = fixed_parent + main_effect = variable[!interaction], + fixed_parent = fixed_parent ) parameter_estimate <- model_parameter_interaction( parameter_estimate = parameter_estimate, - interaction = variable[interaction], fixed_parent = fixed_parent + interaction = variable[interaction], + fixed_parent = fixed_parent ) parameter <- attr(parameter_estimate, "parameter") @@ -80,7 +90,8 @@ setMethod( re_names <- re_names[grepl("^Precision for ", re_names)] if (length(re_names) > 0) { map_df( - get_model(analysis)$marginals.hyperpar[re_names], inla_inverse + get_model(analysis)$marginals.hyperpar[re_names], + inla_inverse ) %>% mutate( parameter = gsub("^Precision for ", "", re_names), @@ -88,14 +99,16 @@ setMethod( ) -> re_variance parameter %>% filter( - is.na(.data$parent), .data$description == "Random effect variance" + is.na(.data$parent), + .data$description == "Random effect variance" ) %>% select(parent = "fingerprint") %>% merge(tibble(description = re_variance$parameter)) %>% mutate( fingerprint = map2_chr( - .data$description, .data$parent, - ~sha1(c(description = .x, parent = .y)) + .data$description, + .data$parent, + ~ sha1(c(description = .x, parent = .y)) ) ) -> extra extra %>% @@ -136,7 +149,8 @@ setMethod( tibble( estimate = get_model(analysis)$waic$waic, analysis = analysis@AnalysisMetadata$file_fingerprint, - parameter = parent$fingerprint, lower_confidence_limit = NA_real_, + parameter = parent$fingerprint, + lower_confidence_limit = NA_real_, upper_confidence_limit = NA_real_ ) ) @@ -151,7 +165,8 @@ setMethod( get_model(analysis)$summary.random[[i]] %>% transmute( analysis = analysis@AnalysisMetadata$file_fingerprint, - parent = i, parameter = as.character(.data$ID), + parent = i, + parameter = as.character(.data$ID), estimate = .data$mean, lower_confidence_limit = .data$`0.025quant`, upper_confidence_limit = .data$`0.975quant` @@ -202,7 +217,7 @@ setMethod( fingerprint = map2_chr( .data$description, .data$parent, - ~sha1(c(description = .x, parent = .y)) + ~ sha1(c(description = .x, parent = .y)) ) ) parameter <- blup_parent %>% @@ -225,7 +240,7 @@ setMethod( fingerprint = map2_chr( .data$description, .data$parent, - ~sha1(c(description = .x, parent = .y)) + ~ sha1(c(description = .x, parent = .y)) ) ) parameter <- bind_rows(parameter, blup_parent) @@ -250,7 +265,7 @@ setMethod( fingerprint = map2_chr( .data$description, .data$parent, - ~sha1(c(description = .x, parent = .y)) + ~ sha1(c(description = .x, parent = .y)) ) ) %>% bind_rows(parameter) @@ -290,7 +305,7 @@ setMethod( fingerprint = map2_chr( .data$description, .data$parent, - ~sha1(c(description = .x, parent = .y)) + ~ sha1(c(description = .x, parent = .y)) ) ) %>% bind_rows(parameter) -> parameter @@ -309,10 +324,18 @@ setMethod( observation_id = as.character(.data$observation_id), analysis = get_file_fingerprint(analysis), estimate = apply(ri@Imputation, 1, quantile, probs = 0.500), - lower_confidence_limit = - apply(ri@Imputation, 1, quantile, probs = 0.025), - upper_confidence_limit = - apply(ri@Imputation, 1, quantile, probs = 0.975) + lower_confidence_limit = apply( + ri@Imputation, + 1, + quantile, + probs = 0.025 + ), + upper_confidence_limit = apply( + ri@Imputation, + 1, + quantile, + probs = 0.975 + ) ) parent <- parameter %>% filter(.data$description == "Imputed value", is.na(.data$parent)) @@ -322,18 +345,23 @@ setMethod( parent = parent$fingerprint, description = .data$observation_id, fingerprint = map2_chr( - .data$description, .data$parent, - ~sha1(c(description = .x, parent = .y)) + .data$description, + .data$parent, + ~ sha1(c(description = .x, parent = .y)) ) ) parameter <- bind_rows(parameter, impute_parameter) parameter_estimate <- extra %>% inner_join( - impute_parameter, by = c("observation_id" = "description") + impute_parameter, + by = c("observation_id" = "description") ) %>% select( - "analysis", "estimate", "lower_confidence_limit", - "upper_confidence_limit", parameter = "fingerprint" + "analysis", + "estimate", + "lower_confidence_limit", + "upper_confidence_limit", + parameter = "fingerprint" ) %>% bind_rows(parameter_estimate) } @@ -347,7 +375,9 @@ setMethod( ) model_parameter_main <- function( - parameter_estimate, main_effect, fixed_parent + parameter_estimate, + main_effect, + fixed_parent ) { attr(parameter_estimate, "parameter") -> parameter for (i in main_effect) { @@ -364,7 +394,7 @@ model_parameter_main <- function( fingerprint = map2_chr( .data$description, .data$parent, - ~sha1(c(description = .x, parent = .y)) + ~ sha1(c(description = .x, parent = .y)) ) ) extra_factor <- tibble( @@ -385,7 +415,7 @@ model_parameter_main <- function( fingerprint = map2_chr( .data$description, .data$parent, - ~sha1(c(description = .x, parent = .y)) + ~ sha1(c(description = .x, parent = .y)) ) ) extra_factor %>% @@ -399,7 +429,9 @@ model_parameter_main <- function( ) %>% mutate( parameter = ifelse( - is.na(.data$fingerprint), .data$parameter, .data$fingerprint + is.na(.data$fingerprint), + .data$parameter, + .data$fingerprint ) ) %>% select(-"fingerprint") -> parameter_estimate @@ -410,7 +442,9 @@ model_parameter_main <- function( } model_parameter_interaction <- function( - parameter_estimate, interaction, fixed_parent + parameter_estimate, + interaction, + fixed_parent ) { attr(parameter_estimate, "parameter") -> parameter for (i in interaction) { @@ -427,7 +461,7 @@ model_parameter_interaction <- function( fingerprint = map2_chr( .data$description, .data$parent, - ~sha1(c(description = .x, parent = .y)) + ~ sha1(c(description = .x, parent = .y)) ) ) parts <- strsplit(i, ":")[[1]] @@ -448,7 +482,7 @@ model_parameter_interaction <- function( fingerprint = map2_chr( .data$description, .data$parent, - ~sha1(c(description = .x, parent = .y)) + ~ sha1(c(description = .x, parent = .y)) ) ) if (nrow(extra_factor) > 0) { diff --git a/R/get_model_parameter_n2k_model_imputed.R b/R/get_model_parameter_n2k_model_imputed.R index 4c8df43d..d91293ac 100644 --- a/R/get_model_parameter_n2k_model_imputed.R +++ b/R/get_model_parameter_n2k_model_imputed.R @@ -12,7 +12,8 @@ setMethod( return(new("n2kParameter")) } parent <- data.frame( - description = "ModelImputed", parent = NA_character_, + description = "ModelImputed", + parent = NA_character_, fingerprint = sha1(c("ModelImputed", NA_character_)), stringsAsFactors = FALSE ) @@ -23,7 +24,9 @@ setMethod( ) %>% mutate( fingerprint = map2_chr( - .data$parent, .data$description, ~sha1(c(.x, .y)) + .data$parent, + .data$description, + ~ sha1(c(.x, .y)) ) ) new( @@ -37,8 +40,10 @@ setMethod( ) %>% transmute( analysis = get_file_fingerprint(analysis), - parameter = .data$fingerprint, estimate = .data$Estimate, - lower_confidence_limit = .data$LCL, upper_confidence_limit = .data$UCL + parameter = .data$fingerprint, + estimate = .data$Estimate, + lower_confidence_limit = .data$LCL, + upper_confidence_limit = .data$UCL ) %>% as.data.frame() ) diff --git a/R/get_result_character.R b/R/get_result_character.R index 48b4da7d..d44571b4 100644 --- a/R/get_result_character.R +++ b/R/get_result_character.R @@ -8,7 +8,11 @@ setMethod( definition = function(x, base, ..., project, verbose = TRUE) { # check arguments assert_that( - is.string(x), is.string(base), is.string(project), noNA(x), noNA(base), + is.string(x), + is.string(base), + is.string(project), + noNA(x), + noNA(base), noNA(project) ) stopifnot("`base` is not a existing directory" = file_test("-d", base)) @@ -26,7 +30,10 @@ setMethod( read_model(x = x, base = base, project = project) |> get_result( - base = base, project = project, ..., verbose = verbose + base = base, + project = project, + ..., + verbose = verbose ) -> result if (status(result) == "converged") { saveRDS(result, file = target) @@ -64,7 +71,11 @@ setMethod( "object not found or multiple objects found" = length(available) == 1 ) get_result( - available[[1]], base = base, project = project, verbose = verbose, ... + available[[1]], + base = base, + project = project, + verbose = verbose, + ... ) } ) diff --git a/R/get_result_n2k_inla.R b/R/get_result_n2k_inla.R index 5c51ad70..b43ea99f 100644 --- a/R/get_result_n2k_inla.R +++ b/R/get_result_n2k_inla.R @@ -41,12 +41,14 @@ setMethod( } } contrast <- tibble( - description = description, analysis = get_file_fingerprint(x) + description = description, + analysis = get_file_fingerprint(x) ) %>% mutate( fingerprint = map2_chr( - .data$description, .data$analysis, - ~sha1(c(description = .x, analysis = .y)) + .data$description, + .data$analysis, + ~ sha1(c(description = .x, analysis = .y)) ) ) %>% select("fingerprint", "description", "analysis") %>% @@ -106,10 +108,13 @@ setMethod( left_join(anomaly@Parameter, by = "parent") %>% transmute( parameter = ifelse( - is.na(.data$fingerprint), .data$parent, .data$fingerprint + is.na(.data$fingerprint), + .data$parent, + .data$fingerprint ), parameter_id = concat( - child = .data$description, parent = .data$parent_description + child = .data$description, + parent = .data$parent_description ) ) @@ -120,7 +125,8 @@ setMethod( as.data.frame() %>% rownames_to_column("description") %>% pivot_longer( - names_to = "parameter_id", values_to = "coefficient", + names_to = "parameter_id", + values_to = "coefficient", colnames(contrast_coefficient)[ !grepl("description", colnames(contrast_coefficient)) ], @@ -177,9 +183,11 @@ setMethod( lc %>% mutate(contrast = contrast$fingerprint) %>% pivot_longer( - names_to = "description", values_to = "coefficient", + names_to = "description", + values_to = "coefficient", colnames(lc)[!grepl("contrast", colnames(lc))], - values_drop_na = TRUE, names_transform = factor + values_drop_na = TRUE, + names_transform = factor ) %>% mutate( description = as.character( @@ -192,7 +200,8 @@ setMethod( lc %>% mutate(contrast = contrast$fingerprint) %>% pivot_longer( - names_to = "description", values_to = "coefficient", + names_to = "description", + values_to = "coefficient", colnames(lc)[!grepl("contrast", colnames(lc))], values_drop_na = TRUE ) %>% @@ -205,7 +214,9 @@ setMethod( ) %>% mutate( description = sprintf( - "%s:%s", .data$main, .data$description + "%s:%s", + .data$main, + .data$description ) ) %>% select(parameter = "fingerprint", "description"), @@ -226,7 +237,8 @@ setMethod( lc <- x@Model$summary.lincomb } contrast_estimate <- tibble( - description = rownames(lc), estimate = lc$mean, + description = rownames(lc), + estimate = lc$mean, lower_confidence_limit = lc[, "0.025quant"], upper_confidence_limit = lc[, "0.975quant"] ) %>% @@ -236,7 +248,9 @@ setMethod( by = "description" ) %>% select( - contrast = "fingerprint", "estimate", "lower_confidence_limit", + contrast = "fingerprint", + "estimate", + "lower_confidence_limit", "upper_confidence_limit" ) %>% arrange(.data$contrast) %>% diff --git a/R/get_result_n2kmanifest.R b/R/get_result_n2kmanifest.R index 183089c3..dbdaa6e8 100644 --- a/R/get_result_n2kmanifest.R +++ b/R/get_result_n2kmanifest.R @@ -15,8 +15,10 @@ setMethod( FUN = function(hash, base, verbose, ...) { list(get_result(x = hash, base = base, ..., verbose = verbose)) }, - FUN.VALUE = vector(mode = "list", length = 1), base = base, - verbose = verbose, ... + FUN.VALUE = vector(mode = "list", length = 1), + base = base, + verbose = verbose, + ... ) |> do.call(what = combine) } @@ -35,13 +37,16 @@ setMethod( assert_that(validObject(x)) display(verbose = verbose, paste("Handle manifest", x@Fingerprint)) get_bucket( - bucket = base, prefix = file.path(project, "results"), max = Inf + bucket = base, + prefix = file.path(project, "results"), + max = Inf ) |> map_chr("Key") |> basename() -> done to_do <- order_manifest(manifest = x) vapply( - to_do[!paste0(to_do, ".rds") %in% done], FUN.VALUE = logical(1), + to_do[!paste0(to_do, ".rds") %in% done], + FUN.VALUE = logical(1), FUN = function(x, base, verbose, project, ...) { display(verbose = verbose, paste(" extracting", x)) substring(x, 1, 4) |> @@ -55,23 +60,37 @@ setMethod( "object not found or multiple objects found" = length(available) == 1 ) get_result( - available[[1]], base = base, verbose = verbose, project = project, ... + available[[1]], + base = base, + verbose = verbose, + project = project, + ... ) gc(verbose = FALSE) return(TRUE) }, - base = base, verbose = verbose, project = project, ... + base = base, + verbose = verbose, + project = project, + ... ) order_manifest(manifest = x) |> vapply( FUN = function(hash, base, project, verbose, ...) { get_result( - x = hash, base = base, project = project, ..., verbose = verbose + x = hash, + base = base, + project = project, + ..., + verbose = verbose ) |> list() }, - FUN.VALUE = vector(mode = "list", length = 1), base = base, - verbose = verbose, project = project, ... + FUN.VALUE = vector(mode = "list", length = 1), + base = base, + verbose = verbose, + project = project, + ... ) |> do.call(what = combine) } diff --git a/R/inla_inverse.R b/R/inla_inverse.R index 106d62e5..2a328c0e 100644 --- a/R/inla_inverse.R +++ b/R/inla_inverse.R @@ -27,7 +27,9 @@ inla_inverse <- function(marginal) { if (inherits(estimate, "try-error")) { return( data.frame( - estimate = NA, lower_confidence_limit = NA, upper_confidence_limit = NA + estimate = NA, + lower_confidence_limit = NA, + upper_confidence_limit = NA ) ) } diff --git a/R/make_a.R b/R/make_a.R index b0216e1c..e8a2c72a 100644 --- a/R/make_a.R +++ b/R/make_a.R @@ -26,8 +26,10 @@ setMethod( all(colnames(object@Coordinates) %in% colnames(data)) ) stopifnot( - "INLA package required but not installed." = - requireNamespace("INLA", quietly = TRUE) + "INLA package required but not installed." = requireNamespace( + "INLA", + quietly = TRUE + ) ) data[colnames(object@Coordinates)] |> as.matrix() |> diff --git a/R/manifest_yaml_to_bash.R b/R/manifest_yaml_to_bash.R index fb40a147..cbb4c008 100644 --- a/R/manifest_yaml_to_bash.R +++ b/R/manifest_yaml_to_bash.R @@ -20,8 +20,14 @@ setGeneric( name = "manifest_yaml_to_bash", def = function( - base, project, hash, shutdown = FALSE, split = 1, - status = c("new", "waiting"), limit = FALSE, timeout = 4 + base, + project, + hash, + shutdown = FALSE, + split = 1, + status = c("new", "waiting"), + limit = FALSE, + timeout = 4 ) { standardGeneric("manifest_yaml_to_bash") # nocov } @@ -38,12 +44,24 @@ setMethod( f = "manifest_yaml_to_bash", signature = signature(base = "s3_bucket"), definition = function( - base, project, hash, shutdown = FALSE, split = 1, - status = c("new", "waiting"), limit = FALSE, timeout = 4 + base, + project, + hash, + shutdown = FALSE, + split = 1, + status = c("new", "waiting"), + limit = FALSE, + timeout = 4 ) { assert_that( - is.string(project), noNA(project), is.flag(shutdown), noNA(shutdown), - is.count(split), is.flag(limit), noNA(limit), is.count(timeout) + is.string(project), + noNA(project), + is.flag(shutdown), + noNA(shutdown), + is.count(split), + is.flag(limit), + noNA(limit), + is.count(timeout) ) if (missing(hash)) { paste(project, "yaml", sep = "/") |> @@ -73,19 +91,30 @@ setMethod( to_do <- object_status(base = base, project = project, status = status) models <- models[models %in% to_do] model_scripts <- create_docker_model_scripts( - models = models, base = base, timeout = timeout, - limit = limit, volume = volume, docker_hash = docker_hash, + models = models, + base = base, + timeout = timeout, + limit = limit, + volume = volume, + docker_hash = docker_hash, project = project ) vapply( - seq_len(split), FUN.VALUE = character(1), project = project, init = init, - split = split, shutdown = shutdown, base = base, + seq_len(split), + FUN.VALUE = character(1), + project = project, + init = init, + split = split, + shutdown = shutdown, + base = base, FUN = function(i, project, split, init, shutdown, base) { script <- path( - project, sprintf("bash/%s_%i.sh", docker_hash, i) + project, + sprintf("bash/%s_%i.sh", docker_hash, i) ) c( - init, model_scripts[seq_along(model_scripts) %% split == (i - 1)], + init, + model_scripts[seq_along(model_scripts) %% split == (i - 1)], "sudo shutdown -h now"[shutdown] ) |> s3write_using(writeLines, object = script, bucket = base) @@ -97,7 +126,8 @@ setMethod( create_docker_init <- function(yaml, docker_hash) { sprintf( - "RUN Rscript -e 'pak::pkg_install(\\\"%s\\\"%s)'", yaml$github, + "RUN Rscript -e 'pak::pkg_install(\\\"%s\\\"%s)'", + yaml$github, ", dependencies = FALSE, upgrade = FALSE, ask = FALSE" ) -> deps sprintf( @@ -107,12 +137,20 @@ echo \"FROM %s %s\" > Dockerfile docker build --pull --tag rn2k:%s . rm Dockerfile", - yaml$docker, paste(deps, collapse = "\n"), docker_hash + yaml$docker, + paste(deps, collapse = "\n"), + docker_hash ) } create_docker_model_scripts <- function( - models, base, timeout = 4, limit = FALSE, volume, docker_hash, project + models, + base, + timeout = 4, + limit = FALSE, + volume, + docker_hash, + project ) { if (inherits(base, "character")) { script <- "./fit_model_file.sh" @@ -123,20 +161,31 @@ create_docker_model_scripts <- function( c( "echo \"\n\nmodel %i of %i\n\n\"\ndate\n", "timeout --kill-after=2m %ih docker run %s --name=%s -v %s rn2k:%s", - script, " -b %s -p %s -m %s%s" + script, + " -b %s -p %s -m %s%s" ) |> paste(collapse = " ") |> sprintf( - seq_along(models), length(models), timeout, + seq_along(models), + length(models), + timeout, paste( c( - "--rm", "--env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID", + "--rm", + "--env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID", "--env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY", "--env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION", - "--cap-add NET_ADMIN"[limit], "--cpu-shares=512"[limit] + "--cap-add NET_ADMIN"[limit], + "--cpu-shares=512"[limit] ), collapse = " " - ), models, volume, docker_hash, base, project, models, + ), + models, + volume, + docker_hash, + base, + project, + models, ifelse(limit, " -s 1", "") ) } @@ -153,18 +202,31 @@ setMethod( f = "manifest_yaml_to_bash", signature = signature(base = "character"), definition = function( - base, project, hash, shutdown = FALSE, split = 1, - status = c("new", "waiting"), limit = FALSE, timeout = 4 + base, + project, + hash, + shutdown = FALSE, + split = 1, + status = c("new", "waiting"), + limit = FALSE, + timeout = 4 ) { assert_that( - is.string(base), noNA(base), file_test("-d", base), is.string(project), - noNA(project), is.flag(shutdown), noNA(shutdown), is.count(split), - is.flag(limit), noNA(limit) + is.string(base), + noNA(base), + file_test("-d", base), + is.string(project), + noNA(project), + is.flag(shutdown), + noNA(shutdown), + is.count(split), + is.flag(limit), + noNA(limit) ) assert_that(split == 1, msg = "`split > 1` to do on local file systems.") assert_that( file_test("-d", base), - msg = sprintf("`%s` is not an existing folder", base) + msg = sprintf("`%s` is not an existing folder", base) ) path(base, project, "yaml") |> dir.create(showWarnings = FALSE) @@ -194,8 +256,13 @@ setMethod( volume <- paste(base, base, "rw", sep = ":") models <- order_manifest(manifest = manifest) model_scripts <- create_docker_model_scripts( - models = models, base = base, timeout = timeout, limit = limit, - volume = volume, docker_hash = docker_hash, project = project + models = models, + base = base, + timeout = timeout, + limit = limit, + volume = volume, + docker_hash = docker_hash, + project = project ) path(base, project, "bash") |> dir_create() @@ -243,7 +310,9 @@ order_manifest <- function(manifest) { #' @importFrom purrr map_chr object_status <- function(base, project, status = c("new", "waiting"), hash) { assert_that( - inherits(base, "s3_bucket"), is.character(status), length(status) > 0, + inherits(base, "s3_bucket"), + is.character(status), + length(status) > 0, is.string(project) ) if (missing(hash)) { diff --git a/R/moving_average.R b/R/moving_average.R index 57f3d843..b231b576 100644 --- a/R/moving_average.R +++ b/R/moving_average.R @@ -18,12 +18,16 @@ moving_average <- function(n_year, duration, first_year = 0) { FUN.VALUE = vector(mode = "numeric", length = n_year), FUN = function(i, trend_coef, n_year) { c(rep(0, i), trend_coef, rep(0, n_year - length(trend_coef) - i)) - }, trend_coef = rep(1 / duration, duration), n_year = n_year + }, + trend_coef = rep(1 / duration, duration), + n_year = n_year ) |> `colnames<-`( sprintf( "average_%.1f_%i", - seq_len(n_year - duration + 1) + median(seq_len(duration)) - 2 + + seq_len(n_year - duration + 1) + + median(seq_len(duration)) - + 2 + first_year, duration ) diff --git a/R/moving_difference.R b/R/moving_difference.R index 33b893d4..77e90123 100644 --- a/R/moving_difference.R +++ b/R/moving_difference.R @@ -26,11 +26,15 @@ moving_difference <- function(n_year, duration, first_year = 1) { FUN.VALUE = vector(mode = "numeric", length = n_year), FUN = function(i, trend_coef, n_year, extra_zero) { c( - rep(0, extra_zero[i, 1]), -trend_coef, + rep(0, extra_zero[i, 1]), + -trend_coef, rep(0, n_year - 2 * length(trend_coef) - sum(extra_zero[i, ])), - trend_coef, rep(0, extra_zero[i, 2]) + trend_coef, + rep(0, extra_zero[i, 2]) ) - }, trend_coef = rep(1 / duration, duration), n_year = n_year, + }, + trend_coef = rep(1 / duration, duration), + n_year = n_year, extra_zero = extra_zero ) |> `colnames<-`( diff --git a/R/moving_trend.R b/R/moving_trend.R index 8cd35779..89912c74 100644 --- a/R/moving_trend.R +++ b/R/moving_trend.R @@ -25,12 +25,15 @@ moving_trend <- function(n_year, duration, first_year = 0) { function(i, trend_coef) { c(rep(0, i - 1), trend_coef, rep(0, n_year - duration - i + 1)) }, - numeric(n_year), trend_coef = trend_coef / sum(trend_coef ^ 2) + numeric(n_year), + trend_coef = trend_coef / sum(trend_coef^2) ) |> `colnames<-`( sprintf( "trend_%.1f_%i", - seq_len(n_year - duration + 1) + median(seq_len(duration)) - 2 + + seq_len(n_year - duration + 1) + + median(seq_len(duration)) - + 2 + first_year, duration ) diff --git a/R/n2k_aggregate_class.R b/R/n2k_aggregate_class.R index fac994c9..cff40ea9 100644 --- a/R/n2k_aggregate_class.R +++ b/R/n2k_aggregate_class.R @@ -45,7 +45,8 @@ setValidity( object@AnalysisMetadata$scheme_id, object@AnalysisMetadata$species_group_id, object@AnalysisMetadata$location_group_id, - object@AnalysisMetadata$model_type, object@AnalysisMetadata$formula, + object@AnalysisMetadata$model_type, + object@AnalysisMetadata$formula, object@AnalysisMetadata$first_imported_year, object@AnalysisMetadata$last_imported_year, object@AnalysisMetadata$duration, @@ -53,7 +54,9 @@ setValidity( format(object@AnalysisMetadata$analysis_date, tz = "UTC"), object@AnalysisMetadata$seed, object@AnalysisRelation$parent_analysis, - object@Function, object@Filter, object@Join + object@Function, + object@Filter, + object@Join ), environment = FALSE ) @@ -67,9 +70,13 @@ setValidity( list( object@AnalysisMetadata$file_fingerprint, object@AnalysisMetadata$status, - object@AnalysisMetadata$analysis_version, object@AnalysisVersion, - object@RPackage, object@AnalysisVersionRPackage, - object@AnalysisRelation, object@RawImputed, object@AggregatedImputed + object@AnalysisMetadata$analysis_version, + object@AnalysisVersion, + object@RPackage, + object@AnalysisVersionRPackage, + object@AnalysisRelation, + object@RawImputed, + object@AggregatedImputed ), digits = 6L ) diff --git a/R/n2k_aggregated.R b/R/n2k_aggregated.R index 71480e09..072fe1fe 100644 --- a/R/n2k_aggregated.R +++ b/R/n2k_aggregated.R @@ -93,11 +93,21 @@ setMethod( file_fingerprint <- sha1( list( dots$result_datasource_id, - dots$scheme_id, dots$species_group_id, dots$location_group_id, - dots$model_type, dots$formula, dots$first_imported_year, - dots$last_imported_year, dots$duration, dots$last_analysed_year, - format(dots$analysis_date, tz = "UTC"), dots$seed, dots$parent, - dots$fun, dots$filter, dots$join + dots$scheme_id, + dots$species_group_id, + dots$location_group_id, + dots$model_type, + dots$formula, + dots$first_imported_year, + dots$last_imported_year, + dots$duration, + dots$last_analysed_year, + format(dots$analysis_date, tz = "UTC"), + dots$seed, + dots$parent, + dots$fun, + dots$filter, + dots$join ), environment = FALSE ) @@ -127,9 +137,15 @@ setMethod( version <- get_analysis_version(sessionInfo()) status_fingerprint <- sha1( list( - file_fingerprint, dots$status, version@AnalysisVersion$fingerprint, - version@AnalysisVersion, version@RPackage, - version@AnalysisVersionRPackage, analysis_relation, NULL, NULL + file_fingerprint, + dots$status, + version@AnalysisVersion$fingerprint, + version@AnalysisVersion, + version@RPackage, + version@AnalysisVersionRPackage, + analysis_relation, + NULL, + NULL ), digits = 6L ) @@ -141,17 +157,22 @@ setMethod( AnalysisVersionRPackage = version@AnalysisVersionRPackage, AnalysisMetadata = data.frame( result_datasource_id = dots$result_datasource_id, - scheme_id = dots$scheme_id, species_group_id = dots$species_group_id, + scheme_id = dots$scheme_id, + species_group_id = dots$species_group_id, location_group_id = dots$location_group_id, - model_type = dots$model_type, formula = dots$formula, + model_type = dots$model_type, + formula = dots$formula, first_imported_year = dots$first_imported_year, last_imported_year = dots$last_imported_year, - duration = dots$duration, last_analysed_year = dots$last_analysed_year, - analysis_date = dots$analysis_date, seed = dots$seed, + duration = dots$duration, + last_analysed_year = dots$last_analysed_year, + analysis_date = dots$analysis_date, + seed = dots$seed, status = dots$status, analysis_version = version@AnalysisVersion$fingerprint, file_fingerprint = file_fingerprint, - status_fingerprint = status_fingerprint, stringsAsFactors = FALSE + status_fingerprint = status_fingerprint, + stringsAsFactors = FALSE ), AnalysisFormula = list(as.formula(dots$formula)), AnalysisRelation = analysis_relation, diff --git a/R/n2k_analysis_metadata_class.R b/R/n2k_analysis_metadata_class.R index 15ec997f..e783f32c 100644 --- a/R/n2k_analysis_metadata_class.R +++ b/R/n2k_analysis_metadata_class.R @@ -16,8 +16,10 @@ setClass( contains = "n2kAnalysisVersion", prototype = prototype( AnalysisRelation = data.frame( - analysis = character(0), parent_analysis = character(0), - parentstatus_fingerprint = character(0), parent_status = character(0), + analysis = character(0), + parent_analysis = character(0), + parentstatus_fingerprint = character(0), + parent_status = character(0), stringsAsFactors = FALSE ) ) @@ -72,8 +74,9 @@ setValidity( assert_that( length(object@AnalysisFormula) == nrow(object@AnalysisMetadata), - msg = + msg = paste( "Number of 'AnalysisFormula' not equal to number of 'AnalysisMetadata'" + ) ) if (inherits(object@AnalysisMetadata$formula, "character")) { assert_that( @@ -111,22 +114,27 @@ setValidity( if (any(object@AnalysisMetadata$last_imported_year > this_year)) { stop("last_imported_year from the future.") } - if (any( - object@AnalysisMetadata$first_imported_year > - object@AnalysisMetadata$last_imported_year - )) { + if ( + any( + object@AnalysisMetadata$first_imported_year > + object@AnalysisMetadata$last_imported_year + ) + ) { stop("first_imported_year cannot exceed last_imported_year") } - if (any( - object@AnalysisMetadata$last_analysed_year > - object@AnalysisMetadata$last_imported_year - )) { + if ( + any( + object@AnalysisMetadata$last_analysed_year > + object@AnalysisMetadata$last_imported_year + ) + ) { stop("last_analysed_year cannot exceed last_imported_year") } list( object@AnalysisMetadata$duration <= object@AnalysisMetadata$last_imported_year - - object@AnalysisMetadata$first_imported_year + 1 + object@AnalysisMetadata$first_imported_year + + 1 ) |> setNames( paste( @@ -139,7 +147,8 @@ setValidity( list( object@AnalysisMetadata$last_analysed_year >= object@AnalysisMetadata$first_imported_year + - object@AnalysisMetadata$duration - 1 + object@AnalysisMetadata$duration - + 1 ) |> setNames( paste( @@ -150,8 +159,15 @@ setValidity( do.call(what = stopifnot) ok_status <- c( - "new", "working", "waiting", "error", "converged", "false_convergence", - "unstable", "insufficient_data", "time-out" + "new", + "working", + "waiting", + "error", + "converged", + "false_convergence", + "unstable", + "insufficient_data", + "time-out" ) if (!all(object@AnalysisMetadata$status %in% ok_status)) { stop( diff --git a/R/n2k_analysis_version_class.R b/R/n2k_analysis_version_class.R index 01231abb..bfba1800 100644 --- a/R/n2k_analysis_version_class.R +++ b/R/n2k_analysis_version_class.R @@ -8,19 +8,25 @@ setClass( "n2kAnalysisVersion", representation = representation( - AnalysisVersion = "data.frame", RPackage = "data.frame", + AnalysisVersion = "data.frame", + RPackage = "data.frame", AnalysisVersionRPackage = "data.frame" ), prototype = prototype( AnalysisVersion = data.frame( - fingerprint = character(0), stringsAsFactors = FALSE + fingerprint = character(0), + stringsAsFactors = FALSE ), RPackage = data.frame( - fingerprint = character(0), description = character(0), - version = character(0), origin = character(0), stringsAsFactors = FALSE + fingerprint = character(0), + description = character(0), + version = character(0), + origin = character(0), + stringsAsFactors = FALSE ), AnalysisVersionRPackage = data.frame( - analysis_version = character(0), r_package = character(0), + analysis_version = character(0), + r_package = character(0), stringsAsFactors = FALSE ) ) @@ -32,7 +38,8 @@ setValidity( "n2kAnalysisVersion", function(object) { check_dataframe_variable( - df = object@AnalysisVersion, variable = "fingerprint", + df = object@AnalysisVersion, + variable = "fingerprint", name = "AnalysisVersion" ) check_dataframe_variable( diff --git a/R/n2k_composite.R b/R/n2k_composite.R index cc4472e4..8266cbcb 100644 --- a/R/n2k_composite.R +++ b/R/n2k_composite.R @@ -7,7 +7,8 @@ setGeneric( name = "n2k_composite", def = function( - parent_status, ... + parent_status, + ... ) { standardGeneric("n2k_composite") # nocov } @@ -32,10 +33,22 @@ setMethod( f = "n2k_composite", signature = signature(parent_status = "data.frame"), definition = function( - parent_status, status = "waiting", result_datasource_id, scheme_id, - formula, species_group_id, location_group_id, model_type, - first_imported_year, last_imported_year, duration, last_analysed_year, - analysis_date, extractor, ..., seed + parent_status, + status = "waiting", + result_datasource_id, + scheme_id, + formula, + species_group_id, + location_group_id, + model_type, + first_imported_year, + last_imported_year, + duration, + last_analysed_year, + analysis_date, + extractor, + ..., + seed ) { assert_that( has_name(parent_status, "parent_analysis"), @@ -52,9 +65,13 @@ setMethod( seed <- as.integer(seed) } assert_that( - is.string(result_datasource_id), is.string(scheme_id), - is.string(species_group_id), is.string(location_group_id), - is.string(model_type), is.string(formula), is.count(first_imported_year), + is.string(result_datasource_id), + is.string(scheme_id), + is.string(species_group_id), + is.string(location_group_id), + is.string(model_type), + is.string(formula), + is.count(first_imported_year), is.count(last_imported_year) ) first_imported_year <- as.integer(first_imported_year) @@ -75,35 +92,58 @@ setMethod( assert_that(inherits(extractor, "function")) file_fingerprint <- sha1( list( - result_datasource_id, scheme_id, species_group_id, location_group_id, - model_type, formula, first_imported_year, last_imported_year, duration, - last_analysed_year, format(analysis_date, tz = "UTC"), seed, - parent_status$parent_analysis, formals(extractor), + result_datasource_id, + scheme_id, + species_group_id, + location_group_id, + model_type, + formula, + first_imported_year, + last_imported_year, + duration, + last_analysed_year, + format(analysis_date, tz = "UTC"), + seed, + parent_status$parent_analysis, + formals(extractor), as.character(body(extractor)) ) ) parent_status$analysis <- file_fingerprint parent_status <- parent_status %>% select( - "analysis", "parent_analysis", "parentstatus_fingerprint", + "analysis", + "parent_analysis", + "parentstatus_fingerprint", "parent_status" ) parameter <- data.frame( - parent = character(0), value = character(0), estimate = numeric(0), - variance = numeric(0), stringsAsFactors = FALSE + parent = character(0), + value = character(0), + estimate = numeric(0), + variance = numeric(0), + stringsAsFactors = FALSE ) index <- data.frame( - calue = character(0), estimate = numeric(0), - lower_confidence_limit = numeric(0), upper_confidence_limit = numeric(0), + calue = character(0), + estimate = numeric(0), + lower_confidence_limit = numeric(0), + upper_confidence_limit = numeric(0), stringsAsFactors = FALSE ) version <- get_analysis_version(sessionInfo()) status_fingerprint <- sha1( list( - file_fingerprint, status, parameter, index, - version@AnalysisVersion$fingerprint, version@AnalysisVersion, - version@RPackage, version@AnalysisVersionRPackage, parent_status + file_fingerprint, + status, + parameter, + index, + version@AnalysisVersion$fingerprint, + version@AnalysisVersion, + version@RPackage, + version@AnalysisVersionRPackage, + parent_status ), digits = 6L ) @@ -114,16 +154,23 @@ setMethod( RPackage = version@RPackage, AnalysisVersionRPackage = version@AnalysisVersionRPackage, AnalysisMetadata = data.frame( - result_datasource_id = result_datasource_id, scheme_id = scheme_id, + result_datasource_id = result_datasource_id, + scheme_id = scheme_id, species_group_id = species_group_id, - location_group_id = location_group_id, model_type = model_type, - formula = formula, first_imported_year = first_imported_year, - last_imported_year = last_imported_year, duration = duration, - last_analysed_year = last_analysed_year, analysis_date = analysis_date, - seed = seed, status = status, + location_group_id = location_group_id, + model_type = model_type, + formula = formula, + first_imported_year = first_imported_year, + last_imported_year = last_imported_year, + duration = duration, + last_analysed_year = last_analysed_year, + analysis_date = analysis_date, + seed = seed, + status = status, analysis_version = version@AnalysisVersion$fingerprint, file_fingerprint = file_fingerprint, - status_fingerprint = status_fingerprint, stringsAsFactors = FALSE + status_fingerprint = status_fingerprint, + stringsAsFactors = FALSE ), AnalysisFormula = list(as.formula(formula)), AnalysisRelation = parent_status, diff --git a/R/n2k_composite_class.R b/R/n2k_composite_class.R index 9a34ea5a..dfdd560f 100644 --- a/R/n2k_composite_class.R +++ b/R/n2k_composite_class.R @@ -29,7 +29,8 @@ setValidity( "n2kComposite", function(object) { assert_that( - nrow(object@AnalysisRelation) > 0, msg = "'AnalysisRelation' not defined" + nrow(object@AnalysisRelation) > 0, + msg = "'AnalysisRelation' not defined" ) assert_that( noNA(object@AnalysisRelation$parent_analysis), @@ -42,14 +43,17 @@ setValidity( object@AnalysisMetadata$scheme_id, object@AnalysisMetadata$species_group_id, object@AnalysisMetadata$location_group_id, - object@AnalysisMetadata$model_type, object@AnalysisMetadata$formula, + object@AnalysisMetadata$model_type, + object@AnalysisMetadata$formula, object@AnalysisMetadata$first_imported_year, object@AnalysisMetadata$last_imported_year, object@AnalysisMetadata$duration, object@AnalysisMetadata$last_analysed_year, format(object@AnalysisMetadata$analysis_date, tz = "UTC"), - object@AnalysisMetadata$seed, object@AnalysisRelation$parent_analysis, - formals(object@Extractor), as.character(body(object@Extractor)) + object@AnalysisMetadata$seed, + object@AnalysisRelation$parent_analysis, + formals(object@Extractor), + as.character(body(object@Extractor)) ) ) assert_that( @@ -59,9 +63,14 @@ setValidity( status_fingerprint <- sha1( list( object@AnalysisMetadata$file_fingerprint, - object@AnalysisMetadata$status, object@Parameter, object@Index, - object@AnalysisMetadata$analysis_version, object@AnalysisVersion, - object@RPackage, object@AnalysisVersionRPackage, object@AnalysisRelation + object@AnalysisMetadata$status, + object@Parameter, + object@Index, + object@AnalysisMetadata$analysis_version, + object@AnalysisVersion, + object@RPackage, + object@AnalysisVersionRPackage, + object@AnalysisRelation ), digits = 6L ) diff --git a/R/n2k_contrast_class.R b/R/n2k_contrast_class.R index 60d36fe8..9ae385f9 100644 --- a/R/n2k_contrast_class.R +++ b/R/n2k_contrast_class.R @@ -14,16 +14,22 @@ setClass( ), prototype = prototype( Contrast = data.frame( - fingerprint = character(0), description = character(0), - analysis = character(0), stringsAsFactors = FALSE + fingerprint = character(0), + description = character(0), + analysis = character(0), + stringsAsFactors = FALSE ), ContrastCoefficient = data.frame( - contrast = character(0), parameter = character(0), - coefficient = numeric(0), stringsAsFactors = FALSE + contrast = character(0), + parameter = character(0), + coefficient = numeric(0), + stringsAsFactors = FALSE ), ContrastEstimate = data.frame( - contrast = character(0), estimate = numeric(0), - lower_confidence_limit = numeric(0), upper_confidence_limit = numeric(0), + contrast = character(0), + estimate = numeric(0), + lower_confidence_limit = numeric(0), + upper_confidence_limit = numeric(0), stringsAsFactors = FALSE ) ) @@ -47,35 +53,45 @@ setValidity( check_dataframe_variable( df = object@ContrastEstimate, variable = c( - "contrast", "estimate", "lower_confidence_limit", + "contrast", + "estimate", + "lower_confidence_limit", "upper_confidence_limit" ), name = "ContrastEstimate" ) - if (!all( - na.omit(object@ContrastCoefficient$contrast) %in% - object@Contrast$fingerprint - )) { + if ( + !all( + na.omit(object@ContrastCoefficient$contrast) %in% + object@Contrast$fingerprint + ) + ) { stop("Some contrast in 'ConstrastCoefficient' slot not found") } - if (!all( - na.omit(object@ContrastEstimate$contrast) %in% - object@Contrast$fingerprint - )) { + if ( + !all( + na.omit(object@ContrastEstimate$contrast) %in% + object@Contrast$fingerprint + ) + ) { stop("Some contrast in 'ConstrastEstimate' slot not found") } if (anyDuplicated(object@Contrast$fingerprint)) { stop("Duplicated fingerprint in 'Contrast' slot") } - if (anyDuplicated( - object@Contrast[, c("description", "analysis")] - )) { + if ( + anyDuplicated( + object@Contrast[, c("description", "analysis")] + ) + ) { stop("Duplicated rows in 'Contrast' slot") } - if (anyDuplicated( - object@ContrastCoefficient[, c("contrast", "parameter")] - )) { + if ( + anyDuplicated( + object@ContrastCoefficient[, c("contrast", "parameter")] + ) + ) { stop("Duplicated rows in 'ContrastCoefficient' slot") } if (anyDuplicated(object@ContrastEstimate$contrast)) { diff --git a/R/n2k_hurdle_imputed.R b/R/n2k_hurdle_imputed.R index a530d807..ff0c74e2 100644 --- a/R/n2k_hurdle_imputed.R +++ b/R/n2k_hurdle_imputed.R @@ -11,7 +11,9 @@ setGeneric( name = "n2k_hurdle_imputed", def = function( - presence, count, verbose = FALSE + presence, + count, + verbose = FALSE ) { standardGeneric("n2k_hurdle_imputed") # nocov } @@ -30,18 +32,23 @@ setMethod( f = "n2k_hurdle_imputed", signature = signature(presence = "n2kInla"), definition = function( - presence, count, verbose = FALSE + presence, + count, + verbose = FALSE ) { assert_that(inherits(count, "n2kInla"), is.flag(verbose), noNA(verbose)) sprintf( - "%s:%s ", count@AnalysisMetadata$location_group_id, + "%s:%s ", + count@AnalysisMetadata$location_group_id, count@AnalysisMetadata$species_group_id ) |> display(verbose = verbose, linefeed = FALSE) validObject(presence) validObject(count) status <- ifelse( - all(c(status(presence), status(count)) == "converged"), "new", "waiting" + all(c(status(presence), status(count)) == "converged"), + "new", + "waiting" ) assert_that( get_seed(presence) == get_seed(count), @@ -56,7 +63,9 @@ setMethod( grepl("^inla zeroinflated(nbinomial|poisson)0: ", get_model_type(count)), gsub("^inla binomial: ", "", get_model_type(presence)) == gsub( - "^inla zeroinflated(nbinomial|poisson)0: ", "", get_model_type(count) + "^inla zeroinflated(nbinomial|poisson)0: ", + "", + get_model_type(count) ), presence@AnalysisMetadata$first_imported_year == count@AnalysisMetadata$first_imported_year, @@ -71,7 +80,8 @@ setMethod( model_type <- sprintf( "inla binomial + zeroinflated%s0: %s", gsub( - "^inla zeroinflated(nbinomial|poisson)0: .*", "\\1", + "^inla zeroinflated(nbinomial|poisson)0: .*", + "\\1", get_model_type(count) ), gsub("^inla binomial: ", "", get_model_type(presence)) @@ -84,7 +94,8 @@ setMethod( presence@AnalysisMetadata$scheme_id, presence@AnalysisMetadata$species_group_id, presence@AnalysisMetadata$location_group_id, - model_type, formula, + model_type, + formula, presence@AnalysisMetadata$first_imported_year, presence@AnalysisMetadata$last_imported_year, presence@AnalysisMetadata$duration, @@ -99,10 +110,12 @@ setMethod( analysis_relation <- data.frame( analysis = file_fingerprint, parent_analysis = c( - get_file_fingerprint(presence), get_file_fingerprint(count) + get_file_fingerprint(presence), + get_file_fingerprint(count) ), parentstatus_fingerprint = c( - get_status_fingerprint(presence), get_status_fingerprint(count) + get_status_fingerprint(presence), + get_status_fingerprint(count) ), parent_status = c(status(presence), status(count)), stringsAsFactors = FALSE @@ -110,14 +123,21 @@ setMethod( version <- get_analysis_version(sessionInfo()) status_fingerprint <- sha1( list( - file_fingerprint, status, version@AnalysisVersion$fingerprint, - version@RPackage, version@AnalysisVersionRPackage, analysis_relation, - presence@RawImputed, count@RawImputed, NULL + file_fingerprint, + status, + version@AnalysisVersion$fingerprint, + version@RPackage, + version@AnalysisVersionRPackage, + analysis_relation, + presence@RawImputed, + count@RawImputed, + NULL ), digits = 6L ) new( - "n2kHurdleImputed", AnalysisVersion = version@AnalysisVersion, + "n2kHurdleImputed", + AnalysisVersion = version@AnalysisVersion, RPackage = version@RPackage, AnalysisVersionRPackage = version@AnalysisVersionRPackage, AnalysisMetadata = data.frame( @@ -125,20 +145,25 @@ setMethod( scheme_id = get_scheme_id(presence), species_group_id = presence@AnalysisMetadata$species_group_id, location_group_id = presence@AnalysisMetadata$location_group_id, - model_type = model_type, formula = formula, + model_type = model_type, + formula = formula, first_imported_year = presence@AnalysisMetadata$first_imported_year, last_imported_year = presence@AnalysisMetadata$last_imported_year, duration = presence@AnalysisMetadata$duration, last_analysed_year = presence@AnalysisMetadata$last_analysed_year, analysis_date = presence@AnalysisMetadata$analysis_date, - seed = presence@AnalysisMetadata$seed, status = status, + seed = presence@AnalysisMetadata$seed, + status = status, analysis_version = version@AnalysisVersion$fingerprint, file_fingerprint = file_fingerprint, - status_fingerprint = status_fingerprint, stringsAsFactors = FALSE + status_fingerprint = status_fingerprint, + stringsAsFactors = FALSE ), AnalysisFormula = list(as.formula(formula)), - AnalysisRelation = analysis_relation, Presence = presence@RawImputed, - Count = count@RawImputed, Hurdle = NULL + AnalysisRelation = analysis_relation, + Presence = presence@RawImputed, + Count = count@RawImputed, + Hurdle = NULL ) } ) diff --git a/R/n2k_hurdle_imputed_class.R b/R/n2k_hurdle_imputed_class.R index 5ff80a00..32c8ae45 100644 --- a/R/n2k_hurdle_imputed_class.R +++ b/R/n2k_hurdle_imputed_class.R @@ -14,7 +14,8 @@ setClass( "n2kHurdleImputed", representation = representation( - Presence = "maybeRawImputed", Count = "maybeRawImputed", + Presence = "maybeRawImputed", + Count = "maybeRawImputed", Hurdle = "maybeAggregatedImputed" ), contains = "n2kModel" @@ -37,13 +38,15 @@ setValidity( object@AnalysisMetadata$scheme_id, object@AnalysisMetadata$species_group_id, object@AnalysisMetadata$location_group_id, - object@AnalysisMetadata$model_type, object@AnalysisMetadata$formula, + object@AnalysisMetadata$model_type, + object@AnalysisMetadata$formula, object@AnalysisMetadata$first_imported_year, object@AnalysisMetadata$last_imported_year, object@AnalysisMetadata$duration, object@AnalysisMetadata$last_analysed_year, format(object@AnalysisMetadata$analysis_date, tz = "UTC"), - object@AnalysisMetadata$seed, object@AnalysisRelation$parent_analysis + object@AnalysisMetadata$seed, + object@AnalysisRelation$parent_analysis ), environment = FALSE ) @@ -56,9 +59,14 @@ setValidity( status_fingerprint <- sha1( list( object@AnalysisMetadata$file_fingerprint, - object@AnalysisMetadata$status, object@AnalysisVersion, object@RPackage, - object@AnalysisVersionRPackage, object@AnalysisRelation, - object@Presence, object@Count, object@Hurdle + object@AnalysisMetadata$status, + object@AnalysisVersion, + object@RPackage, + object@AnalysisVersionRPackage, + object@AnalysisRelation, + object@Presence, + object@Count, + object@Hurdle ), digits = 6L ) diff --git a/R/n2k_import.R b/R/n2k_import.R index 0134a6ff..15caf3cd 100644 --- a/R/n2k_import.R +++ b/R/n2k_import.R @@ -71,10 +71,18 @@ setMethod( file_fingerprint <- sha1( list( dots$result_datasource_id, - dots$scheme_id, dots$species_group_id, dots$location_group_id, - dots$model_type, dots$formula, dots$first_imported_year, - dots$last_imported_year, dots$duration, dots$last_analysed_year, - format(dots$analysis_date, tz = "UTC"), dots$seed, character(0) + dots$scheme_id, + dots$species_group_id, + dots$location_group_id, + dots$model_type, + dots$formula, + dots$first_imported_year, + dots$last_imported_year, + dots$duration, + dots$last_analysed_year, + format(dots$analysis_date, tz = "UTC"), + dots$seed, + character(0) ), environment = FALSE ) @@ -82,9 +90,13 @@ setMethod( version <- get_analysis_version(sessionInfo()) status_fingerprint <- sha1( list( - file_fingerprint, dots$status, version@AnalysisVersion$fingerprint, - version@AnalysisVersion, version@RPackage, - version@AnalysisVersionRPackage, dots$dataset + file_fingerprint, + dots$status, + version@AnalysisVersion$fingerprint, + version@AnalysisVersion, + version@RPackage, + version@AnalysisVersionRPackage, + dots$dataset ), digits = 6L ) diff --git a/R/n2k_import_class.R b/R/n2k_import_class.R index 60095acc..6a7c859c 100644 --- a/R/n2k_import_class.R +++ b/R/n2k_import_class.R @@ -32,7 +32,8 @@ setValidity( object@AnalysisMetadata$scheme_id, object@AnalysisMetadata$species_group_id, object@AnalysisMetadata$location_group_id, - object@AnalysisMetadata$model_type, object@AnalysisMetadata$formula, + object@AnalysisMetadata$model_type, + object@AnalysisMetadata$formula, object@AnalysisMetadata$first_imported_year, object@AnalysisMetadata$last_imported_year, object@AnalysisMetadata$duration, @@ -53,8 +54,10 @@ setValidity( list( object@AnalysisMetadata$file_fingerprint, object@AnalysisMetadata$status, - object@AnalysisMetadata$analysis_version, object@AnalysisVersion, - object@RPackage, object@AnalysisVersionRPackage, + object@AnalysisMetadata$analysis_version, + object@AnalysisVersion, + object@RPackage, + object@AnalysisVersionRPackage, object@Dataset ), digits = 6L diff --git a/R/n2k_inla.R b/R/n2k_inla.R index 9e3c28d9..1a57e810 100644 --- a/R/n2k_inla.R +++ b/R/n2k_inla.R @@ -10,7 +10,9 @@ setGeneric( name = "n2k_inla", def = function( - data, ..., model_fit + data, + ..., + model_fit ) { standardGeneric("n2k_inla") # nocov } @@ -46,12 +48,32 @@ setMethod( f = "n2k_inla", signature = signature(data = "data.frame"), definition = function( - data, status = "new", result_datasource_id, scheme_id, family = "poisson", - formula, species_group_id, location_group_id, model_type, - first_imported_year, last_imported_year, duration, last_analysed_year, - analysis_date, lin_comb = NULL, minimum = "", imputation_size, - parent = character(0), seed, replicate_name = list(), control = list(), - parent_status = "converged", parent_statusfingerprint, extra, ..., model_fit + data, + status = "new", + result_datasource_id, + scheme_id, + family = "poisson", + formula, + species_group_id, + location_group_id, + model_type, + first_imported_year, + last_imported_year, + duration, + last_analysed_year, + analysis_date, + lin_comb = NULL, + minimum = "", + imputation_size, + parent = character(0), + seed, + replicate_name = list(), + control = list(), + parent_status = "converged", + parent_statusfingerprint, + extra, + ..., + model_fit ) { assert_that(is.string(status)) assert_that(is.string(minimum)) @@ -67,9 +89,13 @@ setMethod( imputation_size <- as.integer(imputation_size) } assert_that( - is.string(result_datasource_id), is.string(scheme_id), - is.string(species_group_id), is.string(location_group_id), - is.string(model_type), is.string(formula), is.count(first_imported_year), + is.string(result_datasource_id), + is.string(scheme_id), + is.string(species_group_id), + is.string(location_group_id), + is.string(model_type), + is.string(formula), + is.count(first_imported_year), is.count(last_imported_year) ) first_imported_year <- as.integer(first_imported_year) @@ -87,7 +113,8 @@ setMethod( last_analysed_year <- as.integer(last_analysed_year) assert_that(is.time(analysis_date)) assert_that( - is.null(lin_comb) || inherits(lin_comb, "list") || + is.null(lin_comb) || + inherits(lin_comb, "list") || (inherits(lin_comb, "matrix") && length(dim(lin_comb) == 2)), msg = "lin_comb must be either a list or a matrix" ) @@ -99,20 +126,28 @@ setMethod( assert_that(is.character(family), length(family) >= 1) assert_that(is.list(control)) control$control.compute$dic <- ifelse( - is.null(control$control.compute$dic), TRUE, control$control.compute$dic + is.null(control$control.compute$dic), + TRUE, + control$control.compute$dic ) control$control.compute$waic <- ifelse( - is.null(control$control.compute$waic), TRUE, control$control.compute$waic + is.null(control$control.compute$waic), + TRUE, + control$control.compute$waic ) control$control.compute$cpo <- ifelse( - is.null(control$control.compute$cpo), TRUE, control$control.compute$cpo + is.null(control$control.compute$cpo), + TRUE, + control$control.compute$cpo ) control$control.compute$config <- ifelse( - is.null(control$control.compute$config), TRUE, + is.null(control$control.compute$config), + TRUE, control$control.compute$config ) control$control.predictor$compute <- ifelse( - is.null(control$control.predictor$compute), TRUE, + is.null(control$control.predictor$compute), + TRUE, control$control.predictor$compute ) if (is.null(control$control.predictor$link)) { @@ -120,7 +155,8 @@ setMethod( } control$control.fixed$prec.intercept <- ifelse( is.null(control$control.fixed$prec.intercept), - 1, control$control.fixed$prec.intercept + 1, + control$control.fixed$prec.intercept ) if (missing(extra)) { extra <- data[0, ] @@ -128,18 +164,36 @@ setMethod( file_fingerprint <- sha1( list( - data, result_datasource_id, scheme_id, species_group_id, - location_group_id, family, model_type, formula, first_imported_year, - last_imported_year, duration, last_analysed_year, - format(analysis_date, tz = "UTC"), seed, parent, replicate_name, - lin_comb, imputation_size, minimum, control, extra + data, + result_datasource_id, + scheme_id, + species_group_id, + location_group_id, + family, + model_type, + formula, + first_imported_year, + last_imported_year, + duration, + last_analysed_year, + format(analysis_date, tz = "UTC"), + seed, + parent, + replicate_name, + lin_comb, + imputation_size, + minimum, + control, + extra ) ) if (length(parent) == 0) { analysis_relation <- data.frame( - analysis = character(0), parent_analysis = character(0), - parentstatus_fingerprint = character(0), parent_status = character(0), + analysis = character(0), + parent_analysis = character(0), + parentstatus_fingerprint = character(0), + parent_status = character(0), stringsAsFactors = FALSE ) } else { @@ -151,36 +205,52 @@ setMethod( assert_that(is.string(parent_statusfingerprint)) } analysis_relation <- data.frame( - analysis = file_fingerprint, parent_analysis = parent, + analysis = file_fingerprint, + parent_analysis = parent, parentstatus_fingerprint = parent_statusfingerprint, - parent_status = parent_status, stringsAsFactors = FALSE + parent_status = parent_status, + stringsAsFactors = FALSE ) } version <- get_analysis_version(sessionInfo()) status_fingerprint <- sha1( list( - file_fingerprint, status, NULL, version@AnalysisVersion$fingerprint, - version@AnalysisVersion, version@RPackage, - version@AnalysisVersionRPackage, analysis_relation, NULL + file_fingerprint, + status, + NULL, + version@AnalysisVersion$fingerprint, + version@AnalysisVersion, + version@RPackage, + version@AnalysisVersionRPackage, + analysis_relation, + NULL ), digits = 6L ) new( "n2kInla", - AnalysisVersion = version@AnalysisVersion, RPackage = version@RPackage, + AnalysisVersion = version@AnalysisVersion, + RPackage = version@RPackage, AnalysisVersionRPackage = version@AnalysisVersionRPackage, AnalysisMetadata = data.frame( - result_datasource_id = result_datasource_id, scheme_id = scheme_id, + result_datasource_id = result_datasource_id, + scheme_id = scheme_id, species_group_id = species_group_id, - location_group_id = location_group_id, model_type = model_type, - formula = formula, first_imported_year = first_imported_year, - last_imported_year = last_imported_year, duration = duration, - last_analysed_year = last_analysed_year, analysis_date = analysis_date, - seed = seed, status = status, + location_group_id = location_group_id, + model_type = model_type, + formula = formula, + first_imported_year = first_imported_year, + last_imported_year = last_imported_year, + duration = duration, + last_analysed_year = last_analysed_year, + analysis_date = analysis_date, + seed = seed, + status = status, analysis_version = version@AnalysisVersion$fingerprint, file_fingerprint = file_fingerprint, - status_fingerprint = status_fingerprint, stringsAsFactors = FALSE + status_fingerprint = status_fingerprint, + stringsAsFactors = FALSE ), AnalysisFormula = list(as.formula(formula)), AnalysisRelation = analysis_relation, @@ -212,7 +282,11 @@ setMethod( f = "n2k_inla", signature = signature(data = "n2kInla", model_fit = "inla"), definition = function( - data, status, raw_imputed = NULL, ..., model_fit + data, + status, + raw_imputed = NULL, + ..., + model_fit ) { assert_that(is.string(status)) data@Model <- model_fit @@ -226,10 +300,15 @@ setMethod( data@RawImputed <- raw_imputed data@AnalysisMetadata$status_fingerprint <- sha1( list( - data@AnalysisMetadata$file_fingerprint, data@AnalysisMetadata$status, - data@Model, data@AnalysisMetadata$analysis_version, - data@AnalysisVersion, data@RPackage, data@AnalysisVersionRPackage, - data@AnalysisRelation, data@RawImputed + data@AnalysisMetadata$file_fingerprint, + data@AnalysisMetadata$status, + data@Model, + data@AnalysisMetadata$analysis_version, + data@AnalysisVersion, + data@RPackage, + data@AnalysisVersionRPackage, + data@AnalysisRelation, + data@RawImputed ), digits = 6L ) diff --git a/R/n2k_inla_class.R b/R/n2k_inla_class.R index af5fd0ca..8dbc88d9 100644 --- a/R/n2k_inla_class.R +++ b/R/n2k_inla_class.R @@ -32,10 +32,15 @@ setClassUnion("maybeRawImputed", c("rawImputed", "aggregatedImputed", "NULL")) setClass( "n2kInla", representation = representation( - Data = "data.frame", Extra = "data.frame", - LinearCombination = "maybeMatrix", ReplicateName = "list", - Model = "maybeInla", Family = "character", Control = "list", - ImputationSize = "integer", Minimum = "character", + Data = "data.frame", + Extra = "data.frame", + LinearCombination = "maybeMatrix", + ReplicateName = "list", + Model = "maybeInla", + Family = "character", + Control = "list", + ImputationSize = "integer", + Minimum = "character", RawImputed = "maybeRawImputed" ), contains = "n2kModel" @@ -56,19 +61,22 @@ setValidity( assert_that(object@ImputationSize >= 0, msg = "negative ImputationSize") c( all.vars(object@AnalysisFormula[[1]]), - "observation_id", "datafield_id" + "observation_id", + "datafield_id" ) %>% walk( - ~assert_that( + ~ assert_that( has_name(object@Data, .x), msg = sprintf("Missing variable `%s` in Data slot", .x) ) ) assert_that( - noNA(object@Data$observation_id), msg = "observation_id cannot be NA" + noNA(object@Data$observation_id), + msg = "observation_id cannot be NA" ) assert_that( - noNA(object@Data$datafield_id), msg = "datafield_id cannot be NA" + noNA(object@Data$datafield_id), + msg = "datafield_id cannot be NA" ) assert_that( @@ -118,24 +126,34 @@ setValidity( } assert_that(is.list(object@Control), msg = "Control must be a list") assert_that( - !has_name(object@Control, "formula"), !has_name(object@Control, "family"), - !has_name(object@Control, "data"), !has_name(object@Control, "lincomb") + !has_name(object@Control, "formula"), + !has_name(object@Control, "family"), + !has_name(object@Control, "data"), + !has_name(object@Control, "lincomb") ) file_fingerprint <- sha1( list( - object@Data, object@AnalysisMetadata$result_datasource_id, + object@Data, + object@AnalysisMetadata$result_datasource_id, object@AnalysisMetadata$scheme_id, object@AnalysisMetadata$species_group_id, - object@AnalysisMetadata$location_group_id, object@Family, - object@AnalysisMetadata$model_type, object@AnalysisMetadata$formula, + object@AnalysisMetadata$location_group_id, + object@Family, + object@AnalysisMetadata$model_type, + object@AnalysisMetadata$formula, object@AnalysisMetadata$first_imported_year, object@AnalysisMetadata$last_imported_year, object@AnalysisMetadata$duration, object@AnalysisMetadata$last_analysed_year, format(object@AnalysisMetadata$analysis_date, tz = "UTC"), - object@AnalysisMetadata$seed, object@AnalysisRelation$parent_analysis, - object@ReplicateName, object@LinearCombination, object@ImputationSize, - object@Minimum, object@Control, object@Extra + object@AnalysisMetadata$seed, + object@AnalysisRelation$parent_analysis, + object@ReplicateName, + object@LinearCombination, + object@ImputationSize, + object@Minimum, + object@Control, + object@Extra ) ) assert_that( @@ -146,10 +164,14 @@ setValidity( status_fingerprint <- sha1( list( object@AnalysisMetadata$file_fingerprint, - object@AnalysisMetadata$status, object@Model, - object@AnalysisMetadata$analysis_version, object@AnalysisVersion, - object@RPackage, object@AnalysisVersionRPackage, - object@AnalysisRelation, object@RawImputed + object@AnalysisMetadata$status, + object@Model, + object@AnalysisMetadata$analysis_version, + object@AnalysisVersion, + object@RPackage, + object@AnalysisVersionRPackage, + object@AnalysisRelation, + object@RawImputed ), digits = 6L ) diff --git a/R/n2k_inla_comparison.R b/R/n2k_inla_comparison.R index a928e98b..6fac3be5 100644 --- a/R/n2k_inla_comparison.R +++ b/R/n2k_inla_comparison.R @@ -12,7 +12,8 @@ setGeneric( name = "n2k_inla_comparison", def = function( - parent_status, ... + parent_status, + ... ) { standardGeneric("n2k_inla_comparison") # nocov } @@ -53,10 +54,21 @@ setMethod( f = "n2k_inla_comparison", signature = signature(parent_status = "data.frame"), definition = function( - parent_status, status = "waiting", result_datasource_id, scheme_id, - formula, species_group_id, location_group_id, model_type, - first_imported_year, last_imported_year, duration, last_analysed_year, - analysis_date, ..., seed + parent_status, + status = "waiting", + result_datasource_id, + scheme_id, + formula, + species_group_id, + location_group_id, + model_type, + first_imported_year, + last_imported_year, + duration, + last_analysed_year, + analysis_date, + ..., + seed ) { assert_that(is.string(status)) if (missing(seed)) { @@ -99,10 +111,17 @@ setMethod( file_fingerprint <- sha1( list( result_datasource_id, - scheme_id, species_group_id, location_group_id, - model_type, formula, first_imported_year, - last_imported_year, duration, last_analysed_year, - format(analysis_date, tz = "UTC"), seed, + scheme_id, + species_group_id, + location_group_id, + model_type, + formula, + first_imported_year, + last_imported_year, + duration, + last_analysed_year, + format(analysis_date, tz = "UTC"), + seed, parent_status$parent_analysis ) ) @@ -110,15 +129,22 @@ setMethod( parent_status$analysis <- file_fingerprint parent_status <- parent_status %>% select( - "analysis", "parent_analysis", "parentstatus_fingerprint", + "analysis", + "parent_analysis", + "parentstatus_fingerprint", "parent_status" ) version <- get_analysis_version(sessionInfo()) status_fingerprint <- sha1( list( - file_fingerprint, status, NULL, - version@AnalysisVersion$fingerprint, version@AnalysisVersion, - version@RPackage, version@AnalysisVersionRPackage, parent_status + file_fingerprint, + status, + NULL, + version@AnalysisVersion$fingerprint, + version@AnalysisVersion, + version@RPackage, + version@AnalysisVersionRPackage, + parent_status ), digits = 6L ) diff --git a/R/n2k_inla_comparison_class.R b/R/n2k_inla_comparison_class.R index dbe817d0..f09780e6 100644 --- a/R/n2k_inla_comparison_class.R +++ b/R/n2k_inla_comparison_class.R @@ -37,7 +37,8 @@ setValidity( object@AnalysisMetadata$scheme_id, object@AnalysisMetadata$species_group_id, object@AnalysisMetadata$location_group_id, - object@AnalysisMetadata$model_type, object@AnalysisMetadata$formula, + object@AnalysisMetadata$model_type, + object@AnalysisMetadata$formula, object@AnalysisMetadata$first_imported_year, object@AnalysisMetadata$last_imported_year, object@AnalysisMetadata$duration, @@ -54,9 +55,13 @@ setValidity( status_fingerprint <- sha1( list( object@AnalysisMetadata$file_fingerprint, - object@AnalysisMetadata$status, object@WAIC, - object@AnalysisMetadata$analysis_version, object@AnalysisVersion, - object@RPackage, object@AnalysisVersionRPackage, object@AnalysisRelation + object@AnalysisMetadata$status, + object@WAIC, + object@AnalysisMetadata$analysis_version, + object@AnalysisVersion, + object@RPackage, + object@AnalysisVersionRPackage, + object@AnalysisRelation ), digits = 6L ) diff --git a/R/n2k_manifest.R b/R/n2k_manifest.R index 30fd3493..7431d6ae 100644 --- a/R/n2k_manifest.R +++ b/R/n2k_manifest.R @@ -30,8 +30,10 @@ setMethod( manifest ) { assert_that( - has_name(manifest, "fingerprint"), has_name(manifest, "parent"), - is.character(manifest$fingerprint), is.character(manifest$parent), + has_name(manifest, "fingerprint"), + has_name(manifest, "parent"), + is.character(manifest$fingerprint), + is.character(manifest$parent), noNA(manifest$fingerprint) ) diff --git a/R/n2k_manifest_class.R b/R/n2k_manifest_class.R index 0a879059..dc61cb4c 100644 --- a/R/n2k_manifest_class.R +++ b/R/n2k_manifest_class.R @@ -9,16 +9,19 @@ setClass( "n2kManifest", representation = representation( - Manifest = "data.frame", Fingerprint = "character" + Manifest = "data.frame", + Fingerprint = "character" ), prototype = prototype( Manifest = data.frame( - fingerprint = character(0), parent = character(0), + fingerprint = character(0), + parent = character(0), stringsAsFactors = FALSE ), Fingerprint = sha1( data.frame( - fingerprint = character(0), parent = character(0), + fingerprint = character(0), + parent = character(0), stringsAsFactors = FALSE ) ) @@ -38,7 +41,9 @@ setValidity( msg = "Fingerprint is not a string (a length one character vector)." ) check_dataframe_variable( - df = object@Manifest, force_na = TRUE, name = "Manifest", + df = object@Manifest, + force_na = TRUE, + name = "Manifest", variable = list(fingerprint = "character", parent = "character") ) assert_that( @@ -47,14 +52,16 @@ setValidity( ) assert_that( - any(is.na(object@Manifest$parent)), msg = "All rows have parents" + any(is.na(object@Manifest$parent)), + msg = "All rows have parents" ) self_link <- object@Manifest %>% filter(.data$fingerprint == .data$parent) %>% nrow() assert_that( - self_link == 0, msg = "Self references between parent and fingerprint" + self_link == 0, + msg = "Self references between parent and fingerprint" ) if (!all(is.na(object@Manifest$parent))) { @@ -85,7 +92,8 @@ setValidity( } assert_that( - sha1(object@Manifest) == object@Fingerprint, msg = "wrong fingerprint" + sha1(object@Manifest) == object@Fingerprint, + msg = "wrong fingerprint" ) return(TRUE) } diff --git a/R/n2k_model_class.R b/R/n2k_model_class.R index de13c83a..130a4096 100644 --- a/R/n2k_model_class.R +++ b/R/n2k_model_class.R @@ -23,6 +23,7 @@ setValidity( if (nrow(object@AnalysisMetadata) != 1) { stop("The 'AnalysisMetadata' slot must contain exactly one row") } + # fmt: skip stopifnot( "Some Analysis in 'AnalysisRelation' slot don't match file_fingerprint" = nrow(object@AnalysisRelation) == 0 || diff --git a/R/n2k_model_imputed.R b/R/n2k_model_imputed.R index 816c822c..6490948e 100644 --- a/R/n2k_model_imputed.R +++ b/R/n2k_model_imputed.R @@ -53,20 +53,27 @@ setMethod( dots$seed <- sample(.Machine$integer.max, 1) } assert_that( - is.count(dots$seed), is.string(dots$result_datasource_id), - is.string(dots$scheme_id), is.string(dots$species_group_id), - is.string(dots$location_group_id), is.string(dots$model_type), - is.string(dots$formula), is.count(dots$first_imported_year), - is.count(dots$last_imported_year), is.time(dots$analysis_date) + is.count(dots$seed), + is.string(dots$result_datasource_id), + is.string(dots$scheme_id), + is.string(dots$species_group_id), + is.string(dots$location_group_id), + is.string(dots$model_type), + is.string(dots$formula), + is.count(dots$first_imported_year), + is.count(dots$last_imported_year), + is.time(dots$analysis_date) ) dots$seed <- as.integer(dots$seed) dots$first_imported_year <- as.integer(dots$first_imported_year) dots$last_imported_year <- as.integer(dots$last_imported_year) dots$duration <- coalesce( - dots$duration, dots$last_imported_year - dots$first_imported_year + 1L + dots$duration, + dots$last_imported_year - dots$first_imported_year + 1L ) dots$last_analysed_year <- coalesce( - dots$last_analysed_year, dots$last_imported_year + dots$last_analysed_year, + dots$last_imported_year ) dots$filter <- c(dots$filter, list()) dots$mutate <- c(dots$mutate, list()) @@ -75,12 +82,18 @@ setMethod( dots$extractor_args <- c(dots$extractor_args, list()) dots$package <- c(dots$package, character(0)) assert_that( - is.count(dots$duration), is.count(dots$last_analysed_year), - is.list(dots$filter), is.list(dots$mutate), is.list(dots$model_args), + is.count(dots$duration), + is.count(dots$last_analysed_year), + is.list(dots$filter), + is.list(dots$mutate), + is.list(dots$model_args), is.function(dots$model_fun) || is.string(dots$model_fun), - is.function(dots$extractor), is.list(dots$prepare_model_args), - length(dots$prepare_model_args) <= 1, is.list(dots$extractor_args), - is.character(dots$package), is.string(dots$parent) + is.function(dots$extractor), + is.list(dots$prepare_model_args), + length(dots$prepare_model_args) <= 1, + is.list(dots$extractor_args), + is.character(dots$package), + is.string(dots$parent) ) dots$duration <- as.integer(dots$duration) dots$last_analysed_year <- as.integer(dots$last_analysed_year) @@ -91,13 +104,26 @@ setMethod( file_fingerprint <- sha1( list( dots$result_datasource_id, - dots$scheme_id, dots$species_group_id, dots$location_group_id, - dots$model_type, dots$formula, dots$first_imported_year, - dots$last_imported_year, dots$duration, dots$last_analysed_year, + dots$scheme_id, + dots$species_group_id, + dots$location_group_id, + dots$model_type, + dots$formula, + dots$first_imported_year, + dots$last_imported_year, + dots$duration, + dots$last_analysed_year, format(dots$analysis_date, tz = "UTC"), - dots$seed, dots$parent, dots$model_fun, dots$filter, - dots$mutate, dots$model_args, dots$prepare_model_args, dots$extractor, - dots$extractor_args, dots$package + dots$seed, + dots$parent, + dots$model_fun, + dots$filter, + dots$mutate, + dots$model_args, + dots$prepare_model_args, + dots$extractor, + dots$extractor_args, + dots$package ), environment = FALSE ) @@ -115,16 +141,24 @@ setMethod( ) |> do.call(what = stopifnot) analysis_relation <- data.frame( - analysis = file_fingerprint, parent_analysis = dots$parent, + analysis = file_fingerprint, + parent_analysis = dots$parent, parentstatus_fingerprint = dots$parent_statusfingerprint, - parent_status = dots$parent_status, stringsAsFactors = FALSE + parent_status = dots$parent_status, + stringsAsFactors = FALSE ) version <- get_analysis_version(sessionInfo()) status_fingerprint <- sha1( list( - file_fingerprint, dots$status, version@AnalysisVersion$fingerprint, - version@AnalysisVersion, version@RPackage, - version@AnalysisVersionRPackage, analysis_relation, NULL, NULL + file_fingerprint, + dots$status, + version@AnalysisVersion$fingerprint, + version@AnalysisVersion, + version@RPackage, + version@AnalysisVersionRPackage, + analysis_relation, + NULL, + NULL ), digits = 6L ) diff --git a/R/n2k_model_imputed_class.R b/R/n2k_model_imputed_class.R index c5c5d905..bffdaf00 100644 --- a/R/n2k_model_imputed_class.R +++ b/R/n2k_model_imputed_class.R @@ -57,8 +57,10 @@ setValidity( "n2kModelImputed", function(object) { stopifnot( - "Function must be either a function or a string" = - inherits(object@Function, "function") || + "Function must be either a function or a string" = inherits( + object@Function, + "function" + ) || (is.string(object@Function) && noNA(object@Function)) ) file_fingerprint <- sha1( @@ -67,7 +69,8 @@ setValidity( object@AnalysisMetadata$scheme_id, object@AnalysisMetadata$species_group_id, object@AnalysisMetadata$location_group_id, - object@AnalysisMetadata$model_type, object@AnalysisMetadata$formula, + object@AnalysisMetadata$model_type, + object@AnalysisMetadata$formula, object@AnalysisMetadata$first_imported_year, object@AnalysisMetadata$last_imported_year, object@AnalysisMetadata$duration, @@ -75,29 +78,39 @@ setValidity( format(object@AnalysisMetadata$analysis_date, tz = "UTC"), object@AnalysisMetadata$seed, object@AnalysisRelation$parent_analysis, - object@Function, object@Filter, object@Mutate, object@ModelArgs, - object@PrepareModelArgs, object@Extractor, object@ExtractorArgs, + object@Function, + object@Filter, + object@Mutate, + object@ModelArgs, + object@PrepareModelArgs, + object@Extractor, + object@ExtractorArgs, object@Package ), environment = FALSE ) stopifnot( - "Corrupt file_fingerprint" = - object@AnalysisMetadata$file_fingerprint == file_fingerprint + "Corrupt file_fingerprint" = object@AnalysisMetadata$file_fingerprint == + file_fingerprint ) status_fingerprint <- sha1( list( object@AnalysisMetadata$file_fingerprint, object@AnalysisMetadata$status, - object@AnalysisMetadata$analysis_version, object@AnalysisVersion, - object@RPackage, object@AnalysisVersionRPackage, - object@AnalysisRelation, object@AggregatedImputed, object@Results + object@AnalysisMetadata$analysis_version, + object@AnalysisVersion, + object@RPackage, + object@AnalysisVersionRPackage, + object@AnalysisRelation, + object@AggregatedImputed, + object@Results ), digits = 6L ) + # fmt: skip stopifnot( "Corrupt status_fingerprint" = object@AnalysisMetadata$status_fingerprint == status_fingerprint diff --git a/R/n2k_parameter_class.R b/R/n2k_parameter_class.R index e11b0a12..5445ee44 100644 --- a/R/n2k_parameter_class.R +++ b/R/n2k_parameter_class.R @@ -13,12 +13,17 @@ setClass( ), prototype = prototype( Parameter = data.frame( - description = character(0), parent = character(0), - fingerprint = character(0), stringsAsFactors = FALSE + description = character(0), + parent = character(0), + fingerprint = character(0), + stringsAsFactors = FALSE ), ParameterEstimate = data.frame( - analysis = character(0), parameter = character(0), estimate = numeric(0), - lower_confidence_limit = numeric(0), upper_confidence_limit = numeric(0), + analysis = character(0), + parameter = character(0), + estimate = numeric(0), + lower_confidence_limit = numeric(0), + upper_confidence_limit = numeric(0), stringsAsFactors = FALSE ) ) @@ -33,7 +38,8 @@ setValidity( function(object) { parameter <- object@Parameter assert_that( - has_name(parameter, "description"), has_name(parameter, "parent"), + has_name(parameter, "description"), + has_name(parameter, "parent"), has_name(parameter, "fingerprint") ) @@ -46,9 +52,11 @@ setValidity( has_name(parameter_estimate, "upper_confidence_limit") ) - if (!all( - na.omit(object@Parameter$parent) %in% object@Parameter$fingerprint - )) { + if ( + !all( + na.omit(object@Parameter$parent) %in% object@Parameter$fingerprint + ) + ) { stop("Some parent in 'Parameter' slot not found") } all( @@ -78,17 +86,20 @@ setValidity( test <- object@ParameterEstimate %>% summarise( test_lcl = all( - .data$estimate - .data$lower_confidence_limit >= 0, na.rm = TRUE + .data$estimate - .data$lower_confidence_limit >= 0, + na.rm = TRUE ), test_ucl = all( - .data$upper_confidence_limit - .data$estimate >= 0, na.rm = TRUE + .data$upper_confidence_limit - .data$estimate >= 0, + na.rm = TRUE ) ) list(test$test_lcl, test$test_ucl) |> setNames( paste( "All estimates in 'ParameterEstimate' slot must be", - c("greather", "less"), "than the", + c("greather", "less"), + "than the", c("`lower_confidence_limit`", "`upper_confidence_limit`") ) ) |> diff --git a/R/n2k_spde.R b/R/n2k_spde.R index ca0cb8d7..36373e06 100644 --- a/R/n2k_spde.R +++ b/R/n2k_spde.R @@ -10,7 +10,9 @@ setGeneric( name = "n2k_spde", def = function( - data, ..., model_fit + data, + ..., + model_fit ) { standardGeneric("n2k_spde") # nocov } @@ -33,12 +35,33 @@ setMethod( f = "n2k_spde", signature = signature(data = "data.frame"), definition = function( - data, status = "new", result_datasource_id, scheme_id, family = "poisson", - formula, species_group_id, location_group_id, model_type, spde, - first_imported_year, last_imported_year, duration, last_analysed_year, - analysis_date, lin_comb = NULL, minimum = "", imputation_size, - parent = character(0), seed, replicate_name = list(), control = list(), - parent_status = "converged", parent_statusfingerprint, extra, ..., model_fit + data, + status = "new", + result_datasource_id, + scheme_id, + family = "poisson", + formula, + species_group_id, + location_group_id, + model_type, + spde, + first_imported_year, + last_imported_year, + duration, + last_analysed_year, + analysis_date, + lin_comb = NULL, + minimum = "", + imputation_size, + parent = character(0), + seed, + replicate_name = list(), + control = list(), + parent_status = "converged", + parent_statusfingerprint, + extra, + ..., + model_fit ) { assert_that(is.string(status)) assert_that(is.string(minimum)) @@ -54,9 +77,13 @@ setMethod( imputation_size <- as.integer(imputation_size) } assert_that( - is.string(result_datasource_id), is.string(scheme_id), - is.string(species_group_id), is.string(location_group_id), - is.string(model_type), is.string(formula), is.count(first_imported_year), + is.string(result_datasource_id), + is.string(scheme_id), + is.string(species_group_id), + is.string(location_group_id), + is.string(model_type), + is.string(formula), + is.count(first_imported_year), is.count(last_imported_year) ) first_imported_year <- as.integer(first_imported_year) @@ -74,7 +101,8 @@ setMethod( last_analysed_year <- as.integer(last_analysed_year) assert_that(is.time(analysis_date)) assert_that( - is.null(lin_comb) || inherits(lin_comb, "list") || + is.null(lin_comb) || + inherits(lin_comb, "list") || (inherits(lin_comb, "matrix") && length(dim(lin_comb) == 2)), msg = "lin_comb must be either a list or a matrix" ) @@ -86,20 +114,28 @@ setMethod( assert_that(is.character(family), length(family) >= 1) assert_that(is.list(control)) control$control.compute$dic <- ifelse( - is.null(control$control.compute$dic), TRUE, control$control.compute$dic + is.null(control$control.compute$dic), + TRUE, + control$control.compute$dic ) control$control.compute$waic <- ifelse( - is.null(control$control.compute$waic), TRUE, control$control.compute$waic + is.null(control$control.compute$waic), + TRUE, + control$control.compute$waic ) control$control.compute$cpo <- ifelse( - is.null(control$control.compute$cpo), TRUE, control$control.compute$cpo + is.null(control$control.compute$cpo), + TRUE, + control$control.compute$cpo ) control$control.compute$config <- ifelse( - is.null(control$control.compute$config), TRUE, + is.null(control$control.compute$config), + TRUE, control$control.compute$config ) control$control.predictor$compute <- ifelse( - is.null(control$control.predictor$compute), TRUE, + is.null(control$control.predictor$compute), + TRUE, control$control.predictor$compute ) if (is.null(control$control.predictor$link)) { @@ -107,7 +143,8 @@ setMethod( } control$control.fixed$prec.intercept <- ifelse( is.null(control$control.fixed$prec.intercept), - 1, control$control.fixed$prec.intercept + 1, + control$control.fixed$prec.intercept ) if (missing(extra)) { extra <- data[0, ] @@ -115,18 +152,36 @@ setMethod( file_fingerprint <- sha1( list( - data, result_datasource_id, scheme_id, species_group_id, - location_group_id, family, model_type, formula, first_imported_year, - last_imported_year, duration, last_analysed_year, - format(analysis_date, tz = "UTC"), seed, parent, replicate_name, - lin_comb, imputation_size, minimum, control, extra + data, + result_datasource_id, + scheme_id, + species_group_id, + location_group_id, + family, + model_type, + formula, + first_imported_year, + last_imported_year, + duration, + last_analysed_year, + format(analysis_date, tz = "UTC"), + seed, + parent, + replicate_name, + lin_comb, + imputation_size, + minimum, + control, + extra ) ) if (length(parent) == 0) { analysis_relation <- data.frame( - analysis = character(0), parent_analysis = character(0), - parentstatus_fingerprint = character(0), parent_status = character(0), + analysis = character(0), + parent_analysis = character(0), + parentstatus_fingerprint = character(0), + parent_status = character(0), stringsAsFactors = FALSE ) } else { @@ -138,42 +193,66 @@ setMethod( assert_that(is.string(parent_statusfingerprint)) } analysis_relation <- data.frame( - analysis = file_fingerprint, parent_analysis = parent, + analysis = file_fingerprint, + parent_analysis = parent, parentstatus_fingerprint = parent_statusfingerprint, - parent_status = parent_status, stringsAsFactors = FALSE + parent_status = parent_status, + stringsAsFactors = FALSE ) } version <- get_analysis_version(sessionInfo()) status_fingerprint <- sha1( list( - file_fingerprint, status, NULL, version@AnalysisVersion$fingerprint, - version@AnalysisVersion, version@RPackage, - version@AnalysisVersionRPackage, analysis_relation, NULL + file_fingerprint, + status, + NULL, + version@AnalysisVersion$fingerprint, + version@AnalysisVersion, + version@RPackage, + version@AnalysisVersionRPackage, + analysis_relation, + NULL ), digits = 6L ) new( "n2kSpde", - AnalysisVersion = version@AnalysisVersion, RPackage = version@RPackage, + AnalysisVersion = version@AnalysisVersion, + RPackage = version@RPackage, AnalysisVersionRPackage = version@AnalysisVersionRPackage, AnalysisMetadata = data.frame( - result_datasource_id = result_datasource_id, scheme_id = scheme_id, + result_datasource_id = result_datasource_id, + scheme_id = scheme_id, species_group_id = species_group_id, - location_group_id = location_group_id, model_type = model_type, - formula = formula, first_imported_year = first_imported_year, - last_imported_year = last_imported_year, duration = duration, - last_analysed_year = last_analysed_year, analysis_date = analysis_date, - seed = seed, status = status, + location_group_id = location_group_id, + model_type = model_type, + formula = formula, + first_imported_year = first_imported_year, + last_imported_year = last_imported_year, + duration = duration, + last_analysed_year = last_analysed_year, + analysis_date = analysis_date, + seed = seed, + status = status, analysis_version = version@AnalysisVersion$fingerprint, file_fingerprint = file_fingerprint, - status_fingerprint = status_fingerprint, stringsAsFactors = FALSE + status_fingerprint = status_fingerprint, + stringsAsFactors = FALSE ), - AnalysisFormula = list(as.formula(formula)), LinearCombination = lin_comb, - AnalysisRelation = analysis_relation, Data = data, Model = NULL, - ReplicateName = replicate_name, Family = family, Control = control, - ImputationSize = imputation_size, Minimum = minimum, RawImputed = NULL, - Extra = extra, Spde = spde + AnalysisFormula = list(as.formula(formula)), + LinearCombination = lin_comb, + AnalysisRelation = analysis_relation, + Data = data, + Model = NULL, + ReplicateName = replicate_name, + Family = family, + Control = control, + ImputationSize = imputation_size, + Minimum = minimum, + RawImputed = NULL, + Extra = extra, + Spde = spde ) } ) @@ -192,7 +271,11 @@ setMethod( f = "n2k_spde", signature = signature(data = "n2kSpde", model_fit = "inla"), definition = function( - data, status, raw_imputed = NULL, ..., model_fit + data, + status, + raw_imputed = NULL, + ..., + model_fit ) { assert_that(is.string(status)) data@Model <- model_fit @@ -206,10 +289,15 @@ setMethod( data@RawImputed <- raw_imputed data@AnalysisMetadata$status_fingerprint <- sha1( list( - data@AnalysisMetadata$file_fingerprint, data@AnalysisMetadata$status, - data@Model, data@AnalysisMetadata$analysis_version, - data@AnalysisVersion, data@RPackage, data@AnalysisVersionRPackage, - data@AnalysisRelation, data@RawImputed + data@AnalysisMetadata$file_fingerprint, + data@AnalysisMetadata$status, + data@Model, + data@AnalysisMetadata$analysis_version, + data@AnalysisVersion, + data@RPackage, + data@AnalysisVersionRPackage, + data@AnalysisRelation, + data@RawImputed ), digits = 6L ) diff --git a/R/parent_status.R b/R/parent_status.R index 5fed58b9..6d575897 100644 --- a/R/parent_status.R +++ b/R/parent_status.R @@ -51,8 +51,11 @@ setReplaceMethod( x@parent_status <- value x@status_fingerprint <- sha1( list( - x@AnalysisMetadata$file_fingerprint, x@AnalysisMetadata$status, - x@Parameter, x@Index, x@AnalysisMetadata$analysis_version, + x@AnalysisMetadata$file_fingerprint, + x@AnalysisMetadata$status, + x@Parameter, + x@Index, + x@AnalysisMetadata$analysis_version, x@AnalysisRelation ), digits = 6L diff --git a/R/read_manifest.R b/R/read_manifest.R index 1d1b9e3b..5cfe0165 100644 --- a/R/read_manifest.R +++ b/R/read_manifest.R @@ -38,10 +38,14 @@ setMethod( ) available <- list.files( - dir, pattern = "\\.manifest$", full.names = TRUE, ignore.case = TRUE + dir, + pattern = "\\.manifest$", + full.names = TRUE, + ignore.case = TRUE ) assert_that( - length(available) > 0, msg = paste0("No manifest files in '", dir, "'") + length(available) > 0, + msg = paste0("No manifest files in '", dir, "'") ) if (missing(hash)) { @@ -49,13 +53,19 @@ setMethod( rownames_to_column("filename") |> slice_max(.data$mtime, n = 1) -> latest read.table( - latest$filename, header = TRUE, sep = "\t", colClasses = "character", + latest$filename, + header = TRUE, + sep = "\t", + colClasses = "character", as.is = TRUE ) |> n2k_manifest() -> manifest stopifnot( - "fingerprint doesn't match" = - paste0(manifest@Fingerprint, ".manifest") == basename(latest$filename) + "fingerprint doesn't match" = paste0( + manifest@Fingerprint, + ".manifest" + ) == + basename(latest$filename) ) return(manifest) } @@ -72,7 +82,10 @@ setMethod( msg = paste0("Multiple manifests found starting with '", hash, "'") ) read.table( - available[selection], header = TRUE, sep = "\t", colClasses = "character", + available[selection], + header = TRUE, + sep = "\t", + colClasses = "character", as.is = TRUE ) |> n2k_manifest() -> manifest @@ -95,20 +108,28 @@ setMethod( if (missing(hash)) { assert_that(is.string(project)) available <- get_bucket( - base, prefix = paste(project, "manifest", sep = "/"), max = Inf + base, + prefix = paste(project, "manifest", sep = "/"), + max = Inf ) stopifnot("No manifest files in this project" = length(available) > 0) map_chr(available, "LastModified") |> as.POSIXct(format = "%Y-%m-%dT%H:%M:%OS") |> which.max() -> latest s3read_using( - read.table, header = TRUE, sep = "\t", colClasses = "character", - as.is = TRUE, object = available[[latest]] + read.table, + header = TRUE, + sep = "\t", + colClasses = "character", + as.is = TRUE, + object = available[[latest]] ) |> n2k_manifest() -> manifest stopifnot( - "fingerprint doesn't match" = - paste0(manifest@Fingerprint, ".manifest") == + "fingerprint doesn't match" = paste0( + manifest@Fingerprint, + ".manifest" + ) == basename(available[[latest]][["Key"]]) ) return(manifest) @@ -121,7 +142,8 @@ setMethod( gsub(pattern = "\\.manifest", replacement = "", x = _) -> hash } available <- get_bucket( - bucket = base, prefix = paste(project, "manifest", hash, sep = "/") + bucket = base, + prefix = paste(project, "manifest", hash, sep = "/") ) assert_that( length(available) > 0, @@ -132,8 +154,12 @@ setMethod( msg = sprintf("Multiple manifests found starting with '%s'", hash) ) s3read_using( - read.table, header = TRUE, sep = "\t", colClasses = "character", - as.is = TRUE, object = available[[1]] + read.table, + header = TRUE, + sep = "\t", + colClasses = "character", + as.is = TRUE, + object = available[[1]] ) |> n2k_manifest() -> manifest stopifnot("fingerprint doesn't match" = manifest@Fingerprint == hash) diff --git a/R/read_result.R b/R/read_result.R index 7a136846..19f7aa9c 100644 --- a/R/read_result.R +++ b/R/read_result.R @@ -23,12 +23,18 @@ setMethod( signature = signature(base = "character"), definition = function(x, base, project) { assert_that( - is.string(x), noNA(x), grepl("^[[:xdigit:]]{40}", x), is.dir(base), - is.string(project), noNA(project), is.dir(path(base, project, "results")) + is.string(x), + noNA(x), + grepl("^[[:xdigit:]]{40}", x), + is.dir(base), + is.string(project), + noNA(project), + is.dir(path(base, project, "results")) ) filename <- path(base, project, "results", x, ext = "rds") assert_that( - file_exists(filename), msg = sprintf("`%s` does not exists", filename) + file_exists(filename), + msg = sprintf("`%s` does not exists", filename) ) readRDS(filename) } @@ -44,7 +50,10 @@ setMethod( signature = signature(base = "s3_bucket"), definition = function(x, base, project) { assert_that( - is.string(x), noNA(x), grepl("^[[:xdigit:]]{40}$", x), is.string(project), + is.string(x), + noNA(x), + grepl("^[[:xdigit:]]{40}$", x), + is.string(project), noNA(project) ) prefix <- file.path(project, "results", paste0(x, ".rds"), fsep = "/") diff --git a/R/result_estimate_n2k_result.R b/R/result_estimate_n2k_result.R index 9536fe5a..df4d0d48 100644 --- a/R/result_estimate_n2k_result.R +++ b/R/result_estimate_n2k_result.R @@ -9,18 +9,25 @@ setMethod( validObject(x) slot(x, "ParameterEstimate") |> inner_join( - slot(x, "Parameter"), by = c("parameter" = "fingerprint") + slot(x, "Parameter"), + by = c("parameter" = "fingerprint") ) -> estimates while (any(!is.na(estimates$parent))) { estimates |> left_join( - slot(x, "Parameter"), by = c("parent" = "fingerprint") + slot(x, "Parameter"), + by = c("parent" = "fingerprint") ) |> transmute( - .data$analysis, .data$estimate, .data$lower_confidence_limit, - .data$upper_confidence_limit, parent = .data$parent.y, + .data$analysis, + .data$estimate, + .data$lower_confidence_limit, + .data$upper_confidence_limit, + parent = .data$parent.y, description = paste( - .data$description.y, .data$description.x, sep = ":" + .data$description.y, + .data$description.x, + sep = ":" ) ) -> estimates } diff --git a/R/select_factor_count_strictly_positive.R b/R/select_factor_count_strictly_positive.R index 8040f595..ca87dde2 100644 --- a/R/select_factor_count_strictly_positive.R +++ b/R/select_factor_count_strictly_positive.R @@ -19,22 +19,23 @@ #' Year = rep(c(1, 1, 1, 1, 2, 2), 2) #' ) #' # Select the locations with at least 3 prescenses -#' select_factor_count_strictly_positive( +#' select_factor_count_non_zero( #' observation, #' variable = "LocationID", #' threshold = 3 #' ) #' # Select those locations in which the species is present in at least 2 years -#' select_factor_count_strictly_positive( +#' select_factor_count_non_zero( #' observation, variable = c("LocationID", "Year"), threshold = 2 #' ) #' # Select those years in which the species is present in at least 2 locations -#' select_factor_count_strictly_positive( +#' select_factor_count_non_zero( #' observation, variable = c("LocationID", "Year"), #' threshold = 2, #' dimension = 2 #' ) -select_factor_count_strictly_positive <- function(# nolint +select_factor_count_non_zero <- function( + # nolint observation, variable, threshold, diff --git a/R/select_factor_treshold.R b/R/select_factor_treshold.R index 7df5bfbf..32d7ba7e 100644 --- a/R/select_factor_treshold.R +++ b/R/select_factor_treshold.R @@ -34,7 +34,8 @@ select_factor_threshold <- function(observation, variable, threshold) { setNames( paste( "The number of observations much be at least twice the number of", - "levels in", variable + "levels in", + variable ) ) |> do.call(what = stopifnot) diff --git a/R/select_observed_range.R b/R/select_observed_range.R index 879236ec..415d4e58 100644 --- a/R/select_observed_range.R +++ b/R/select_observed_range.R @@ -23,7 +23,8 @@ select_observed_range <- function(observation, variable) { ) if (any(is.na(observation[, variable]))) { warning( - variable, " contains missing values. Corresponding rows are removed." + variable, + " contains missing values. Corresponding rows are removed." ) } diff --git a/R/session_package.R b/R/session_package.R index 0d888abb..202d6fc2 100644 --- a/R/session_package.R +++ b/R/session_package.R @@ -18,7 +18,9 @@ package_version <- function(x) { if (has_name(x, "Repository")) { return( data.frame( - description = x$Package, version = x$Version, origin = "CRAN", + description = x$Package, + version = x$Version, + origin = "CRAN", stringsAsFactors = FALSE ) ) @@ -29,14 +31,19 @@ package_version <- function(x) { description = x$Package, version = x$Version, origin = sprintf( - "Github: %s/%s@%s", x$GithubUsername, x$GithubRepo, x$GithubSHA1 + "Github: %s/%s@%s", + x$GithubUsername, + x$GithubRepo, + x$GithubSHA1 ), stringsAsFactors = FALSE ) ) } data.frame( - description = x$Package, version = x$Version, origin = "local", + description = x$Package, + version = x$Version, + origin = "local", stringsAsFactors = FALSE ) } diff --git a/R/sha1.R b/R/sha1.R index daffd36f..b6a9ef51 100644 --- a/R/sha1.R +++ b/R/sha1.R @@ -12,7 +12,8 @@ sha1.inla <- function(x, digits = 14L, zapsmall = 7L, ...) { FixedEffects = list(x$summary.fixed), Hyper = list(x$summary.hyperpar) ) - attr(parameter, "digest::sha1") <- list( # nolint: object_name_linter. + attr(parameter, "digest::sha1") <- list( + # nolint: object_name_linter. class = class(x), digits = as.integer(digits), zapsmall = as.integer(zapsmall), @@ -31,7 +32,8 @@ sha1.rawImputed <- function(x, digits = 14L, zapsmall = 7L, ...) { Response = x@Response, Imputation = x@Imputation ) - attr(parameter, "digest::sha1") <- list( # nolint: object_name_linter. + attr(parameter, "digest::sha1") <- list( + # nolint: object_name_linter. class = class(x), digits = as.integer(digits), zapsmall = as.integer(zapsmall), @@ -49,7 +51,8 @@ sha1.aggregatedImputed <- function(x, digits = 14L, zapsmall = 7L, ...) { Covariate = x@Covariate, Imputation = x@Imputation ) - attr(parameter, "digest::sha1") <- list( # nolint: object_name_linter. + attr(parameter, "digest::sha1") <- list( + # nolint: object_name_linter. class = class(x), digits = as.integer(digits), zapsmall = as.integer(zapsmall), diff --git a/R/spde.R b/R/spde.R index ce8738b6..cd53a640 100644 --- a/R/spde.R +++ b/R/spde.R @@ -9,10 +9,19 @@ #' @importFrom methods new spde <- function(coordinates, range, sigma) { assert_that( - inherits(coordinates, "data.frame"), is.numeric(range), is.numeric(sigma), - noNA(coordinates), noNA(range), noNA(sigma), ncol(coordinates) == 2, - length(range) == 2, length(sigma) == 2, all(range > 0), all(sigma > 0), - range[2] < 1, sigma[2] < 1 + inherits(coordinates, "data.frame"), + is.numeric(range), + is.numeric(sigma), + noNA(coordinates), + noNA(range), + noNA(sigma), + ncol(coordinates) == 2, + length(range) == 2, + length(sigma) == 2, + all(range > 0), + all(sigma > 0), + range[2] < 1, + sigma[2] < 1 ) new("Spde", Coordinates = coordinates, Range = range, Sigma = sigma) } diff --git a/R/spde2matern.R b/R/spde2matern.R index 7946eb58..d3b90a3e 100644 --- a/R/spde2matern.R +++ b/R/spde2matern.R @@ -20,11 +20,14 @@ setMethod( signature = signature(object = "Spde"), definition = function(object) { stopifnot( - "INLA package required but not installed." = - requireNamespace("INLA", quietly = TRUE) + "INLA package required but not installed." = requireNamespace( + "INLA", + quietly = TRUE + ) ) INLA::inla.spde2.pcmatern( - mesh = spde2mesh(object), prior.range = object@Range, + mesh = spde2mesh(object), + prior.range = object@Range, prior.sigma = object@Sigma ) } diff --git a/R/spde2mesh.R b/R/spde2mesh.R index 91f4136e..c472a7a4 100644 --- a/R/spde2mesh.R +++ b/R/spde2mesh.R @@ -20,10 +20,14 @@ setMethod( signature = signature(object = "Spde"), definition = function(object) { stopifnot( - "fmesher package required but not installed." = - requireNamespace("fmesher", quietly = TRUE), - "sf package required but not installed." = - requireNamespace("sf", quietly = TRUE) + "fmesher package required but not installed." = requireNamespace( + "fmesher", + quietly = TRUE + ), + "sf package required but not installed." = requireNamespace( + "sf", + quietly = TRUE + ) ) max_dist <- object@Range[1] object@Coordinates |> @@ -32,7 +36,8 @@ setMethod( sf::st_union() |> sf::st_simplify(dTolerance = max_dist / 10) -> region fmesher::fm_mesh_2d_inla( - boundary = region, max.edge = c(max_dist / 3, max_dist * 2), + boundary = region, + max.edge = c(max_dist / 3, max_dist * 2), cutoff = max_dist / 10 ) } diff --git a/R/spde_class.R b/R/spde_class.R index 68fe52a7..aaefa84f 100644 --- a/R/spde_class.R +++ b/R/spde_class.R @@ -15,7 +15,9 @@ setClass( "Spde", representation = representation( - Coordinates = "data.frame", Range = "numeric", Sigma = "numeric" + Coordinates = "data.frame", + Range = "numeric", + Sigma = "numeric" ) ) @@ -25,10 +27,16 @@ setValidity( "Spde", function(object) { assert_that( - noNA(object@Coordinates), noNA(object@Range), noNA(object@Sigma), - ncol(object@Coordinates) == 2, length(object@Range) == 2, - length(object@Sigma) == 2, all(object@Range > 0), all(object@Sigma > 0), - object@Range[2] < 1, object@Sigma[2] < 1 + noNA(object@Coordinates), + noNA(object@Range), + noNA(object@Sigma), + ncol(object@Coordinates) == 2, + length(object@Range) == 2, + length(object@Sigma) == 2, + all(object@Range > 0), + all(object@Sigma > 0), + object@Range[2] < 1, + object@Sigma[2] < 1 ) return(TRUE) } diff --git a/R/status.R b/R/status.R index da6244bc..edf6b848 100644 --- a/R/status.R +++ b/R/status.R @@ -98,9 +98,15 @@ setReplaceMethod( x@AnalysisMetadata$status <- value x@AnalysisMetadata$status_fingerprint <- sha1( list( - x@AnalysisMetadata$file_fingerprint, x@AnalysisMetadata$status, x@Model, - x@AnalysisMetadata$analysis_version, x@AnalysisVersion, x@RPackage, - x@AnalysisVersionRPackage, x@AnalysisRelation, x@RawImputed + x@AnalysisMetadata$file_fingerprint, + x@AnalysisMetadata$status, + x@Model, + x@AnalysisMetadata$analysis_version, + x@AnalysisVersion, + x@RPackage, + x@AnalysisVersionRPackage, + x@AnalysisRelation, + x@RawImputed ), digits = 6L ) @@ -120,9 +126,14 @@ setReplaceMethod( x@AnalysisMetadata$status <- value x@AnalysisMetadata$status_fingerprint <- sha1( list( - x@AnalysisMetadata$file_fingerprint, x@AnalysisMetadata$status, - x@Parameter, x@Index, x@AnalysisMetadata$analysis_version, - x@AnalysisVersion, x@RPackage, x@AnalysisVersionRPackage, + x@AnalysisMetadata$file_fingerprint, + x@AnalysisMetadata$status, + x@Parameter, + x@Index, + x@AnalysisMetadata$analysis_version, + x@AnalysisVersion, + x@RPackage, + x@AnalysisVersionRPackage, x@AnalysisRelation ), digits = 6L @@ -143,9 +154,13 @@ setReplaceMethod( x@AnalysisMetadata$status <- value x@AnalysisMetadata$status_fingerprint <- sha1( list( - x@AnalysisMetadata$file_fingerprint, x@AnalysisMetadata$status, - x@WAIC, x@AnalysisMetadata$analysis_version, - x@AnalysisVersion, x@RPackage, x@AnalysisVersionRPackage, + x@AnalysisMetadata$file_fingerprint, + x@AnalysisMetadata$status, + x@WAIC, + x@AnalysisMetadata$analysis_version, + x@AnalysisVersion, + x@RPackage, + x@AnalysisVersionRPackage, x@AnalysisRelation ), digits = 6L @@ -166,9 +181,14 @@ setReplaceMethod( x@AnalysisMetadata$status <- value x@AnalysisMetadata$status_fingerprint <- sha1( list( - x@AnalysisMetadata$file_fingerprint, x@AnalysisMetadata$status, - x@AnalysisMetadata$analysis_version, x@AnalysisVersion, x@RPackage, - x@AnalysisVersionRPackage, x@AnalysisRelation, x@RawImputed, + x@AnalysisMetadata$file_fingerprint, + x@AnalysisMetadata$status, + x@AnalysisMetadata$analysis_version, + x@AnalysisVersion, + x@RPackage, + x@AnalysisVersionRPackage, + x@AnalysisRelation, + x@RawImputed, x@AggregatedImputed ), digits = 6L @@ -189,9 +209,14 @@ setReplaceMethod( x@AnalysisMetadata$status <- value x@AnalysisMetadata$status_fingerprint <- sha1( list( - x@AnalysisMetadata$file_fingerprint, x@AnalysisMetadata$status, - x@AnalysisMetadata$analysis_version, x@AnalysisVersion, x@RPackage, - x@AnalysisVersionRPackage, x@AnalysisRelation, x@AggregatedImputed, + x@AnalysisMetadata$file_fingerprint, + x@AnalysisMetadata$status, + x@AnalysisMetadata$analysis_version, + x@AnalysisVersion, + x@RPackage, + x@AnalysisVersionRPackage, + x@AnalysisRelation, + x@AggregatedImputed, x@Results ), digits = 6L @@ -213,9 +238,15 @@ setReplaceMethod( x@AnalysisMetadata$status <- value x@AnalysisMetadata$status_fingerprint <- sha1( list( - get_file_fingerprint(x), x@AnalysisMetadata$status, - x@AnalysisVersion$fingerprint, x@AnalysisVersion, x@RPackage, - x@AnalysisVersionRPackage, x@AnalysisRelation, x@Presence, x@Count, + get_file_fingerprint(x), + x@AnalysisMetadata$status, + x@AnalysisVersion$fingerprint, + x@AnalysisVersion, + x@RPackage, + x@AnalysisVersionRPackage, + x@AnalysisRelation, + x@Presence, + x@Count, x@Hurdle ), digits = 6L diff --git a/R/store_manifest.R b/R/store_manifest.R index 6dbcbdcb..3942538a 100644 --- a/R/store_manifest.R +++ b/R/store_manifest.R @@ -24,8 +24,12 @@ setMethod( signature = signature(base = "character"), definition = function(x, base, project, overwrite = FALSE) { assert_that( - inherits(x, "n2kManifest"), is.string(base), file_test("-d", base), - is.string(project), is.flag(overwrite), noNA(overwrite) + inherits(x, "n2kManifest"), + is.string(base), + file_test("-d", base), + is.string(project), + is.flag(overwrite), + noNA(overwrite) ) validObject(x, complete = TRUE) @@ -37,7 +41,9 @@ setMethod( #test if file exists fingerprint <- get_file_fingerprint(x) filename <- list.files( - dir, pattern = sprintf("%s.manifest$", fingerprint), full.names = TRUE + dir, + pattern = sprintf("%s.manifest$", fingerprint), + full.names = TRUE ) if (!overwrite && length(filename) > 0) { return(normalizePath(filename, winslash = "/")) @@ -58,18 +64,27 @@ setMethod( signature = signature(base = "s3_bucket"), definition = function(x, base, project, overwrite = FALSE) { assert_that( - inherits(x, "n2kManifest"), is.string(project), noNA(project), - is.flag(overwrite), noNA(overwrite) + inherits(x, "n2kManifest"), + is.string(project), + noNA(project), + is.flag(overwrite), + noNA(overwrite) ) validObject(x, complete = TRUE) filename <- file.path( - fsep = "/", project, "manifest", + fsep = "/", + project, + "manifest", sprintf("%s.manifest", get_file_fingerprint(x)) ) write_s3_fun( - object = x@Manifest, bucket = base, key = filename, overwrite = overwrite, - row.names = FALSE, sep = "\t" + object = x@Manifest, + bucket = base, + key = filename, + overwrite = overwrite, + row.names = FALSE, + sep = "\t" ) } ) diff --git a/R/store_manifest_yaml.R b/R/store_manifest_yaml.R index afc16a16..82fa2971 100644 --- a/R/store_manifest_yaml.R +++ b/R/store_manifest_yaml.R @@ -25,18 +25,32 @@ setMethod( f = "store_manifest_yaml", signature = signature(base = "s3_bucket"), definition = function( - x, base, project, docker, dependencies, overwrite = FALSE + x, + base, + project, + docker, + dependencies, + overwrite = FALSE ) { assert_that( - is.string(docker), is.character(dependencies), noNA(dependencies), - noNA(docker), is.flag(overwrite), noNA(overwrite) + is.string(docker), + is.character(dependencies), + noNA(dependencies), + noNA(docker), + is.flag(overwrite), + noNA(overwrite) ) stored <- store_manifest( - x = x, base = base, project = project, overwrite = overwrite + x = x, + base = base, + project = project, + overwrite = overwrite ) list( - github = dependencies, docker = docker, bucket = attr(base, "Name"), + github = dependencies, + docker = docker, + bucket = attr(base, "Name"), project = project, hash = basename(stored) |> gsub(pattern = "\\.manifest", replacement = "") @@ -44,7 +58,10 @@ setMethod( filename <- sprintf("%s/yaml/%s.yaml", project, sha1(yaml)) write_s3_fun( - object = yaml, bucket = base, key = filename, overwrite = overwrite, + object = yaml, + bucket = base, + key = filename, + overwrite = overwrite, fun = write_yaml ) } @@ -60,18 +77,32 @@ setMethod( f = "store_manifest_yaml", signature = signature(base = "character"), definition = function( - x, base, project, docker, dependencies, overwrite = FALSE + x, + base, + project, + docker, + dependencies, + overwrite = FALSE ) { assert_that( - is.dir(base), is.string(docker), is.character(dependencies), - is.flag(overwrite), noNA(overwrite) + is.dir(base), + is.string(docker), + is.character(dependencies), + is.flag(overwrite), + noNA(overwrite) ) stored <- store_manifest( - x = x, base = base, project = project, overwrite = overwrite + x = x, + base = base, + project = project, + overwrite = overwrite ) list( - github = dependencies, docker = docker, bucket = base, project = project, + github = dependencies, + docker = docker, + bucket = base, + project = project, hash = basename(stored) |> gsub(pattern = "\\.manifest", replacement = "") ) -> yaml diff --git a/R/store_model.R b/R/store_model.R index 89571453..ba782cd2 100644 --- a/R/store_model.R +++ b/R/store_model.R @@ -27,11 +27,17 @@ setMethod( signature = signature(base = "character"), definition = function(x, base, project, overwrite = TRUE, validate = TRUE) { assert_that( - is.flag(overwrite), noNA(overwrite), inherits(x, "n2kModel"), - is.string(base), is.string(project), noNA(project), is.flag(validate) + is.flag(overwrite), + noNA(overwrite), + inherits(x, "n2kModel"), + is.string(base), + is.string(project), + noNA(project), + is.flag(validate) ) assert_that( - dir_exists(base), msg = sprintf("`%s` is not an existing directory", base) + dir_exists(base), + msg = sprintf("`%s` is not an existing directory", base) ) base <- path_abs(base) if (isTRUE(validate)) { @@ -45,7 +51,9 @@ setMethod( base <- path(base, project, part) dir_create(base) current <- dir_ls( - base, recurse = TRUE, type = "file", + base, + recurse = TRUE, + type = "file", regexp = sprintf("%s.rds$", fingerprint) ) if (length(current) > 0) { @@ -75,8 +83,13 @@ setMethod( signature = signature(base = "s3_bucket"), definition = function(x, base, project, overwrite = TRUE, validate = TRUE) { assert_that( - inherits(x, "n2kModel"), is.string(project), is.flag(overwrite), - is.flag(validate), noNA(project), noNA(overwrite), noNA(validate) + inherits(x, "n2kModel"), + is.string(project), + is.flag(overwrite), + is.flag(validate), + noNA(project), + noNA(overwrite), + noNA(validate) ) if (isTRUE(validate)) { validObject(x, complete = TRUE) @@ -121,8 +134,10 @@ setMethod( sha1() |> sprintf(fmt = "%2$s/backup/%1$s", project) -> backup copy_object( - from_object = old[1], to_object = backup, - from_bucket = base, to_bucket = base + from_object = old[1], + to_object = backup, + from_bucket = base, + to_bucket = base ) delete_object(old, bucket = base) } @@ -152,8 +167,10 @@ setMethod( # restore the backup because s3saveRDS() failed if (!bucket_ok) { copy_object( - from_object = backup, to_object = old[1], - from_bucket = base, to_bucket = base + from_object = backup, + to_object = old[1], + from_bucket = base, + to_bucket = base ) } # remove the backup diff --git a/R/union.R b/R/union.R index 43613988..f334afe8 100644 --- a/R/union.R +++ b/R/union.R @@ -18,7 +18,8 @@ union <- function(...) { rownames(r_package) <- NULL analysis_version <- sha1(r_package) analysis_version_r_package <- data.frame( - analysis_version = analysis_version, r_package = r_package$fingerprint + analysis_version = analysis_version, + r_package = r_package$fingerprint ) output <- combine( ..., diff --git a/R/write_s3_fun.R b/R/write_s3_fun.R index ccf5c29d..dcb6b198 100644 --- a/R/write_s3_fun.R +++ b/R/write_s3_fun.R @@ -2,7 +2,13 @@ #' @importFrom aws.s3 bucket_exists get_bucket s3write_using #' @importFrom purrr map_chr write_s3_fun <- function( - object, bucket, key, fun = write.table, overwrite = FALSE, opts = NULL, ..., + object, + bucket, + key, + fun = write.table, + overwrite = FALSE, + opts = NULL, + ..., max_attempt = 10 ) { assert_that(is.flag(overwrite), noNA(overwrite), is.count(max_attempt)) @@ -19,7 +25,12 @@ write_s3_fun <- function( repeat { bucket_ok <- tryCatch( s3write_using( - x = object, FUN = fun, bucket = bucket, object = key, opts = opts, ... + x = object, + FUN = fun, + bucket = bucket, + object = key, + opts = opts, + ... ), error = function(err) { err diff --git a/man/select_factor_count_strictly_positive.Rd b/man/select_factor_count_non_zero.Rd similarity index 84% rename from man/select_factor_count_strictly_positive.Rd rename to man/select_factor_count_non_zero.Rd index b3c6086d..7355c99d 100644 --- a/man/select_factor_count_strictly_positive.Rd +++ b/man/select_factor_count_non_zero.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/select_factor_count_strictly_positive.R -\name{select_factor_count_strictly_positive} -\alias{select_factor_count_strictly_positive} +\name{select_factor_count_non_zero} +\alias{select_factor_count_non_zero} \title{Select data based on the number of presences per category} \usage{ -select_factor_count_strictly_positive( +select_factor_count_non_zero( observation, variable, threshold, @@ -37,17 +37,17 @@ observation <- data.frame( Year = rep(c(1, 1, 1, 1, 2, 2), 2) ) # Select the locations with at least 3 prescenses -select_factor_count_strictly_positive( +select_factor_count_non_zero( observation, variable = "LocationID", threshold = 3 ) # Select those locations in which the species is present in at least 2 years -select_factor_count_strictly_positive( +select_factor_count_non_zero( observation, variable = c("LocationID", "Year"), threshold = 2 ) # Select those years in which the species is present in at least 2 locations -select_factor_count_strictly_positive( +select_factor_count_non_zero( observation, variable = c("LocationID", "Year"), threshold = 2, dimension = 2 diff --git a/tests/testthat/helper_test_data.R b/tests/testthat/helper_test_data.R index 4e684966..a9b1e74b 100644 --- a/tests/testthat/helper_test_data.R +++ b/tests/testthat/helper_test_data.R @@ -23,7 +23,7 @@ test_data <- function(datafield_id = sha1(letters), missing = 0) { mm_fixed <- model.matrix(~ A * (B + C) + C * D, data = dataset) fixed <- runif(ncol(mm_fixed)) - mm_random <- model.matrix(~ 0 + factor(E) : A, data = dataset) + mm_random <- model.matrix(~ 0 + factor(E):A, data = dataset) random <- rnorm(length(levels(dataset$A)) * n_e, sd = sd_random) random <- apply(matrix(random, nrow = n_e), 2, cumsum) random <- as.vector(random) diff --git a/tests/testthat/test_aaa_get_datafield_id.R b/tests/testthat/test_aaa_get_datafield_id.R index d6870d10..b6135d57 100644 --- a/tests/testthat/test_aaa_get_datafield_id.R +++ b/tests/testthat/test_aaa_get_datafield_id.R @@ -5,19 +5,28 @@ test_that("get_datafield_id", { expect_is( get_datafield_id( - table = "test", field = "id", datasource = "database", root = root + table = "test", + field = "id", + datasource = "database", + root = root ), "integer" ) expect_is( get_datafield_id( - table = "test", field = "id", datasource = "database", root = root + table = "test", + field = "id", + datasource = "database", + root = root ), "integer" ) expect_is( get_datafield_id( - table = "test2", field = "id", datasource = "database", root = root + table = "test2", + field = "id", + datasource = "database", + root = root ), "integer" ) diff --git a/tests/testthat/test_aaa_n2k_manifest.R b/tests/testthat/test_aaa_n2k_manifest.R index 841cfff0..45a9c1e2 100644 --- a/tests/testthat/test_aaa_n2k_manifest.R +++ b/tests/testthat/test_aaa_n2k_manifest.R @@ -15,7 +15,9 @@ test_that("n2k_manifest checks the content of Manifest", { new( "n2kManifest", Manifest = data.frame( - fingerprint = 1, parent = "1", stringsAsFactors = FALSE + fingerprint = 1, + parent = "1", + stringsAsFactors = FALSE ) ), "fingerprint: got 'numeric'" @@ -28,7 +30,9 @@ test_that("n2k_manifest checks the content of Manifest", { test_that("n2k_manifest checks the fingerprint", { manifest <- data.frame( - fingerprint = "1", parent = NA_character_, stringsAsFactors = FALSE + fingerprint = "1", + parent = NA_character_, + stringsAsFactors = FALSE ) expect_error(new("n2kManifest", Manifest = manifest), "wrong fingerprint") expect_error( @@ -46,31 +50,38 @@ test_that("n2k_manifest checks the fingerprint", { }) test_that( - "n2k_manifest checks the correct link between parent and fingerprint", + paste("n2k_manifest checks the correct link between parent and fingerprint"), { manifest <- data.frame( - fingerprint = "1", parent = "2", stringsAsFactors = FALSE + fingerprint = "1", + parent = "2", + stringsAsFactors = FALSE ) expect_error( new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)), "All rows have parents" ) manifest <- data.frame( - fingerprint = c("1", "2"), parent = c(NA, "3"), stringsAsFactors = FALSE + fingerprint = c("1", "2"), + parent = c(NA, "3"), + stringsAsFactors = FALSE ) expect_error( new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)), "Some parent in 'Manifest' slot have no matching fingerprint" ) manifest <- data.frame( - fingerprint = c("1", "2"), parent = c(NA, "2"), stringsAsFactors = FALSE + fingerprint = c("1", "2"), + parent = c(NA, "2"), + stringsAsFactors = FALSE ) expect_error( new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)), "Self references between parent and fingerprint" ) manifest <- data.frame( - fingerprint = c("1", "2", "3"), parent = c(NA, "3", "2"), + fingerprint = c("1", "2", "3"), + parent = c(NA, "3", "2"), stringsAsFactors = FALSE ) expect_error( @@ -79,7 +90,8 @@ test_that( ) manifest <- data.frame( fingerprint = as.character(seq(1, 20)), - parent = as.character(c(NA, 1:19)), stringsAsFactors = FALSE + parent = as.character(c(NA, 1:19)), + stringsAsFactors = FALSE ) expect_error( new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)), @@ -90,14 +102,18 @@ test_that( test_that("n2kManifest generates the object", { manifest <- data.frame( - fingerprint = "1", parent = NA_character_, stringsAsFactors = FALSE + fingerprint = "1", + parent = NA_character_, + stringsAsFactors = FALSE ) expect_is( new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)), "n2kManifest" ) manifest <- data.frame( - fingerprint = c("1", "2"), parent = c(NA, "1"), stringsAsFactors = FALSE + fingerprint = c("1", "2"), + parent = c(NA, "1"), + stringsAsFactors = FALSE ) expect_is( new("n2kManifest", Manifest = manifest, Fingerprint = sha1(manifest)), @@ -124,14 +140,17 @@ test_that("n2k_manifest ignores extra columns", { expect_is( x <- n2k_manifest( data.frame( - fingerprint = "1", parent = NA_character_, junk = 1, + fingerprint = "1", + parent = NA_character_, + junk = 1, stringsAsFactors = FALSE ) ), "n2kManifest" ) expect_identical( - colnames(x@Manifest), c("fingerprint", "parent") + colnames(x@Manifest), + c("fingerprint", "parent") ) }) diff --git a/tests/testthat/test_aaa_select_factor_count_strictly_positive.R b/tests/testthat/test_aaa_select_factor_count_strictly_positive.R index ce991f8a..84079aa6 100644 --- a/tests/testthat/test_aaa_select_factor_count_strictly_positive.R +++ b/tests/testthat/test_aaa_select_factor_count_strictly_positive.R @@ -1,5 +1,5 @@ context("select data based on positive observations") -describe("select_factor_count_strictly_positive", { +describe("select_factor_count_non_zero", { observation <- data.frame( Count = c(4, 4, 4, 4, 3, 3, 3, 0, 2, 2, 0, 0), LocationID = rep(1:3, each = 4), @@ -12,7 +12,7 @@ describe("select_factor_count_strictly_positive", { it("selects correctly", { expect_that( - select_factor_count_strictly_positive( + select_factor_count_non_zero( observation = observation, variable = "LocationID", threshold = 3, @@ -21,7 +21,7 @@ describe("select_factor_count_strictly_positive", { is_identical_to(subset(observation, LocationID %in% 1:2)) ) expect_that( - select_factor_count_strictly_positive( + select_factor_count_non_zero( observation = observation, variable = c("LocationID", "Year"), threshold = 2, @@ -30,7 +30,7 @@ describe("select_factor_count_strictly_positive", { is_identical_to(subset(observation, LocationID == 2)) ) expect_that( - select_factor_count_strictly_positive( + select_factor_count_non_zero( observation = observation, variable = c("LocationID", "Year"), threshold = 2, @@ -39,7 +39,7 @@ describe("select_factor_count_strictly_positive", { is_identical_to(subset(observation, Year == 1)) ) expect_that( - select_factor_count_strictly_positive( + select_factor_count_non_zero( observation = observation_relative, variable = "Year", threshold = 0.15, @@ -51,7 +51,7 @@ describe("select_factor_count_strictly_positive", { }) it("checks the number of dimensions", { expect_that( - select_factor_count_strictly_positive( + select_factor_count_non_zero( observation = observation, variable = "LocationID", threshold = 3, @@ -60,7 +60,7 @@ describe("select_factor_count_strictly_positive", { throws_error("the dimension can't exceed the number of variables") ) expect_that( - select_factor_count_strictly_positive( + select_factor_count_non_zero( observation = observation, variable = c("LocationID", "Year"), threshold = 3, @@ -72,7 +72,7 @@ describe("select_factor_count_strictly_positive", { }) it("checks the correct class of threshold", { expect_that( - select_factor_count_strictly_positive( + select_factor_count_non_zero( observation = observation, variable = "LocationID", threshold = 0.15, @@ -82,7 +82,7 @@ describe("select_factor_count_strictly_positive", { throws_error("threshold is not a count \\(a single positive integer\\)") ) expect_that( - select_factor_count_strictly_positive( + select_factor_count_non_zero( observation = observation, variable = "LocationID", threshold = 3, diff --git a/tests/testthat/test_aaa_select_factor_treshold.R b/tests/testthat/test_aaa_select_factor_treshold.R index 81c9ef97..2dac3f58 100644 --- a/tests/testthat/test_aaa_select_factor_treshold.R +++ b/tests/testthat/test_aaa_select_factor_treshold.R @@ -31,7 +31,8 @@ describe("select_factor_threshold", { throws_error( paste( "The number of observations much be at least twice the number of", - "levels in", variable + "levels in", + variable ) ) ) diff --git a/tests/testthat/test_aaa_validobject.R b/tests/testthat/test_aaa_validobject.R index 2fefa25a..a1364875 100644 --- a/tests/testthat/test_aaa_validobject.R +++ b/tests/testthat/test_aaa_validobject.R @@ -21,7 +21,10 @@ test_that("n2kanalysis objects", { TRUE } else { paste( - "Unequal x,y lengths: ", length(object@x), ", ", length(object@y), + "Unequal x,y lengths: ", + length(object@x), + ", ", + length(object@y), sep = "" ) } @@ -37,7 +40,9 @@ test_that("n2kanalysis objects", { expect_error(validObject(t2)) temp_dir <- normalizePath( - tempfile("n2kanalysis"), winslash = "/", mustWork = FALSE + tempfile("n2kanalysis"), + winslash = "/", + mustWork = FALSE ) dir.create(temp_dir) dir.create(paste(temp_dir, "sub", sep = "/")) diff --git a/tests/testthat/test_aba_n2k_import.R b/tests/testthat/test_aba_n2k_import.R index 60231a79..f045a696 100644 --- a/tests/testthat/test_aba_n2k_import.R +++ b/tests/testthat/test_aba_n2k_import.R @@ -10,16 +10,23 @@ test_that("n2kimport", { last_imported_year <- 10 analysis_date <- Sys.time() dataset <- data.frame( - filename = "filename", fingerprint = "fingerprint", - import_date = Sys.time(), stringsAsFactors = TRUE + filename = "filename", + fingerprint = "fingerprint", + import_date = Sys.time(), + stringsAsFactors = TRUE ) expect_is( junk <- n2k_import( - status = status, result_datasource_id = result_datasource_id, - scheme_id = scheme_id, species_group_id = species_group_id, - location_group_id = location_group_id, model_type = model_type, - formula = formula, first_imported_year = first_imported_year, - last_imported_year = last_imported_year, analysis_date = analysis_date, + status = status, + result_datasource_id = result_datasource_id, + scheme_id = scheme_id, + species_group_id = species_group_id, + location_group_id = location_group_id, + model_type = model_type, + formula = formula, + first_imported_year = first_imported_year, + last_imported_year = last_imported_year, + analysis_date = analysis_date, dataset = dataset ), "n2kImport" diff --git a/tests/testthat/test_aba_n2k_inla.R b/tests/testthat/test_aba_n2k_inla.R index f3b43efb..70466013 100644 --- a/tests/testthat/test_aba_n2k_inla.R +++ b/tests/testthat/test_aba_n2k_inla.R @@ -17,7 +17,7 @@ this_lc <- dataset %>% select("A", "B", "C", "D") %>% filter(.data$C == max(.data$C), .data$D == max(.data$D)) %>% distinct() %>% - model.matrix(object = ~A * (B + C) + C:D) + model.matrix(object = ~ A * (B + C) + C:D) object <- n2k_inla( result_datasource_id = this_result_datasource_id, scheme_id = this_scheme_id, @@ -31,16 +31,22 @@ object <- n2k_inla( data = dataset ) model_object <- INLA::inla( - Count ~ A * (B + C) + C:D + - f(E, model = "rw1", replicate = as.integer(A)) + - f(G, model = "iid"), + Count ~ + A * + (B + C) + + C:D + + f(E, model = "rw1", replicate = as.integer(A)) + + f(G, model = "iid"), data = object@Data, family = "poisson" ) model_truth <- INLA::inla( - Count ~ A * (B + C) + C:D + - f(E, model = "rw1", replicate = as.integer(A)) + - f(G, model = "iid"), + Count ~ + A * + (B + C) + + C:D + + f(E, model = "rw1", replicate = as.integer(A)) + + f(G, model = "iid"), data = dataset, family = "poisson" ) @@ -85,12 +91,17 @@ test_that("n2k_inla() requires a correct status", { ) expect_that( n2k_inla( - data = dataset, result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + data = dataset, + result_datasource_id = this_result_datasource_id, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, status = NA_character_ + analysis_date = this_analysis_date, + status = NA_character_ ), throws_error("status must be one of the following") ) @@ -98,10 +109,14 @@ test_that("n2k_inla() requires a correct status", { test_that("n2k_inla() checks the model type", { expect_that( n2k_inla( - data = dataset, result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = "junk", - formula = this_formula, first_imported_year = this_first_imported_year, + data = dataset, + result_datasource_id = this_result_datasource_id, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = "junk", + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, analysis_date = this_analysis_date ), @@ -112,12 +127,17 @@ test_that("n2k_inla() sets the correct seed", { this_seed <- 12345L expect_that( n2k_inla( - data = dataset, result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + data = dataset, + result_datasource_id = this_result_datasource_id, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, seed = this_seed + analysis_date = this_analysis_date, + seed = this_seed )@AnalysisMetadata$seed, is_identical_to(this_seed) ) @@ -126,23 +146,32 @@ test_that("n2k_inla() converts numeric seed, when possible", { this_seed <- 12345 expect_that( n2k_inla( - data = dataset, result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + data = dataset, + result_datasource_id = this_result_datasource_id, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, seed = this_seed + analysis_date = this_analysis_date, + seed = this_seed )@AnalysisMetadata$seed, is_identical_to(as.integer(this_seed)) ) expect_that( n2k_inla( - data = dataset, result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, + data = dataset, + result_datasource_id = this_result_datasource_id, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, seed = this_seed + 0.1 + analysis_date = this_analysis_date, + seed = this_seed + 0.1 ), throws_error("seed is not a count") ) @@ -154,12 +183,16 @@ test_that("n2k_inla() sets a random seed when not provided", { test_that("n2k_inla() sets the correct scheme_id", { expect_identical( n2k_inla( - data = dataset, result_datasource_id = this_result_datasource_id, + data = dataset, + result_datasource_id = this_result_datasource_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, scheme_id = this_scheme_id + analysis_date = this_analysis_date, + scheme_id = this_scheme_id )@AnalysisMetadata$scheme_id, this_scheme_id ) @@ -168,12 +201,15 @@ test_that("n2k_inla() sets the correct scheme_id", { test_that("n2k_inla() sets the correct species_group_id", { expect_identical( n2k_inla( - data = dataset, result_datasource_id = this_result_datasource_id, + data = dataset, + result_datasource_id = this_result_datasource_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, + location_group_id = this_location_group_id, + model_type = this_model_type, first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, formula = this_formula, + analysis_date = this_analysis_date, + formula = this_formula, scheme_id = this_scheme_id )@AnalysisMetadata$species_group_id, this_species_group_id @@ -183,12 +219,16 @@ test_that("n2k_inla() sets the correct species_group_id", { test_that("n2k_inla() sets the correct location_group_id", { expect_identical( n2k_inla( - data = dataset, result_datasource_id = this_result_datasource_id, + data = dataset, + result_datasource_id = this_result_datasource_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, scheme_id = this_scheme_id + analysis_date = this_analysis_date, + scheme_id = this_scheme_id )@AnalysisMetadata$location_group_id, this_location_group_id ) @@ -197,12 +237,16 @@ test_that("n2k_inla() sets the correct location_group_id", { test_that("n2k_inla() sets the correct first_imported_year", { expect_identical( n2k_inla( - data = dataset, result_datasource_id = this_result_datasource_id, + data = dataset, + result_datasource_id = this_result_datasource_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, scheme_id = this_scheme_id + analysis_date = this_analysis_date, + scheme_id = this_scheme_id )@AnalysisMetadata$first_imported_year, this_first_imported_year ) @@ -210,25 +254,31 @@ test_that("n2k_inla() sets the correct first_imported_year", { test_that("n2k_inla() checks that first_imported_year is from the past", { expect_error( n2k_inla( - data = dataset, result_datasource_id = this_result_datasource_id, + data = dataset, + result_datasource_id = this_result_datasource_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, + location_group_id = this_location_group_id, + model_type = this_model_type, formula = this_formula, first_imported_year = as.integer(format(Sys.time(), "%Y")) + 1, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, scheme_id = this_scheme_id + analysis_date = this_analysis_date, + scheme_id = this_scheme_id ), "first_imported_year cannot exceed last_imported_year" ) expect_is( n2k_inla( - data = dataset, result_datasource_id = this_result_datasource_id, + data = dataset, + result_datasource_id = this_result_datasource_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, + location_group_id = this_location_group_id, + model_type = this_model_type, formula = this_formula, first_imported_year = as.integer(format(Sys.time(), "%Y")), last_imported_year = as.integer(format(Sys.time(), "%Y")), - analysis_date = this_analysis_date, scheme_id = this_scheme_id + analysis_date = this_analysis_date, + scheme_id = this_scheme_id ), "n2kInla" ) @@ -303,8 +353,10 @@ test_that("n2k_inla() checks that last_imported_year is from the past", { ) }) test_that( - "n2k_inla() checks that last_imported_year is not earlier than - first_imported_year", + paste( + "n2k_inla() checks that last_imported_year is not earlier than", + "first_imported_year" + ), { expect_that( n2k_inla( @@ -393,8 +445,10 @@ test_that("n2k_inla() converts numeric duration, when possible", { ) }) test_that( - "n2k_inla() checks that duration is not outside the FirstImportYear - - last_imported_year ranges", + paste( + "n2k_inla() checks that duration is not outside the FirstImportYear -", + "last_imported_year ranges" + ), { expect_that( n2k_inla( @@ -562,7 +616,9 @@ test_that("n2k_inla() checks if analysis date is from the past", { ) }) test_that( - "n2k_inla() checks if all variable in formula are available in the data", + paste( + "n2k_inla() checks if all variable in formula are available in the data" + ), { expect_that( n2k_inla( @@ -628,7 +684,9 @@ test_that( ) object_model <- n2k_inla( - data = object, model_fit = model_object, status = "converged" + data = object, + model_fit = model_object, + status = "converged" ) test_that("n2k_inla() keeps the objects", { expect_that( @@ -655,7 +713,10 @@ test_that("n2k_inla() keeps the objects", { ) expect_that( n2k_inla( - data = object, model_fit = model_object, status = "converged", seed = 1 + data = object, + model_fit = model_object, + status = "converged", + seed = 1 )@AnalysisMetadata$seed, is_identical_to(object@AnalysisMetadata$seed) ) @@ -797,21 +858,29 @@ test_that("n2k_inla() stores the new status", { ) expect_that( n2k_inla( - data = object, model_fit = model_object, status = "junk" + data = object, + model_fit = model_object, + status = "junk" ), throws_error("status must be one of the following") ) }) model_other <- INLA::inla( - Count ~ A * (B + C) + C:D + - f(E, model = "rw1", replicate = as.integer(A)) + - f(G, model = "iid"), - data = object@Data, family = "nbinomial" + Count ~ + A * + (B + C) + + C:D + + f(E, model = "rw1", replicate = as.integer(A)) + + f(G, model = "iid"), + data = object@Data, + family = "nbinomial" ) test_that("n2k_inla() checks if the family matches", { expect_that( n2k_inla( - data = object, model_fit = model_other, status = "converged" + data = object, + model_fit = model_other, + status = "converged" ), throws_error("Model of the wrong family") ) diff --git a/tests/testthat/test_aba_n2k_inla_accessor.R b/tests/testthat/test_aba_n2k_inla_accessor.R index bd8988af..e23d75bb 100644 --- a/tests/testthat/test_aba_n2k_inla_accessor.R +++ b/tests/testthat/test_aba_n2k_inla_accessor.R @@ -1,16 +1,22 @@ test_that("get_model() handles n2kInla objects", { dataset <- test_data() object <- n2k_inla( - result_datasource_id = sha1(letters), scheme_id = sha1(letters), - species_group_id = sha1(letters), location_group_id = sha1(letters), - model_type = "inla poisson: A", formula = "Count ~ A", - first_imported_year = 1990, last_imported_year = 2015, + result_datasource_id = sha1(letters), + scheme_id = sha1(letters), + species_group_id = sha1(letters), + location_group_id = sha1(letters), + model_type = "inla poisson: A", + formula = "Count ~ A", + first_imported_year = 1990, + last_imported_year = 2015, analysis_date = as.POSIXct("2000-01-01 12:13:14", tz = "UTC"), data = dataset ) model_object <- INLA::inla(Count ~ A, data = object@Data, family = "poisson") object_model <- n2k_inla( - data = object, model_fit = model_object, status = "converged" + data = object, + model_fit = model_object, + status = "converged" ) expect_that(get_model(object), is_identical_to(object@Model)) expect_that(get_model(object_model), is_identical_to(object_model@Model)) diff --git a/tests/testthat/test_aba_n2k_inla_validation.R b/tests/testthat/test_aba_n2k_inla_validation.R index e0ef308a..1ba81ea9 100644 --- a/tests/testthat/test_aba_n2k_inla_validation.R +++ b/tests/testthat/test_aba_n2k_inla_validation.R @@ -148,7 +148,6 @@ describe("illegal changes in the status fingerprint", { ) }) - it("detects changes in Model", { model_object <- INLA::inla( Count ~ B, @@ -156,7 +155,9 @@ describe("illegal changes in the status fingerprint", { family = "poisson" ) object_model <- n2k_inla( - data = object, model_fit = model_object, status = "converged" + data = object, + model_fit = model_object, + status = "converged" ) change_model <- INLA::inla( Count ~ C, diff --git a/tests/testthat/test_abb_n2k_spde.R b/tests/testthat/test_abb_n2k_spde.R index a92af460..e6db893a 100644 --- a/tests/testthat/test_abb_n2k_spde.R +++ b/tests/testthat/test_abb_n2k_spde.R @@ -17,20 +17,26 @@ this_lc <- dataset %>% select("A", "B", "C", "D") %>% filter(.data$C == max(.data$C), .data$D == max(.data$D)) %>% distinct() %>% - model.matrix(object = ~A * (B + C) + C:D) + model.matrix(object = ~ A * (B + C) + C:D) test_that("n2k_spde() creates the object", { expect_s4_class( spde_model <- spde( - dataset[, c("C", "D")], range = c(0.5, 0.05), sigma = c(0.5, 0.05) + dataset[, c("C", "D")], + range = c(0.5, 0.05), + sigma = c(0.5, 0.05) ), "Spde" ) expect_s4_class( object <- n2k_spde( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, spde = spde_model, data = dataset, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + spde = spde_model, + data = dataset, first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, analysis_date = this_analysis_date diff --git a/tests/testthat/test_aca_n2k_aggregate.R b/tests/testthat/test_aca_n2k_aggregate.R index 96b12edb..0b8e7d37 100644 --- a/tests/testthat/test_aca_n2k_aggregate.R +++ b/tests/testthat/test_aca_n2k_aggregate.R @@ -16,22 +16,29 @@ test_that("prepare a n2kAggregate object", { dataset <- test_data() object <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, data = dataset + analysis_date = this_analysis_date, + data = dataset ) object <- fit_model(object) expect_is( child <- n2k_aggregate( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, location_group_id = this_location_group_id, - model_type = this_model_type, formula = this_formula, + model_type = this_model_type, + formula = this_formula, first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, parent = get_file_fingerprint(object), + analysis_date = this_analysis_date, + parent = get_file_fingerprint(object), fun = sum ), "n2kAggregate" @@ -41,7 +48,8 @@ test_that("prepare a n2kAggregate object", { project <- "n2kaggregate" expect_is(store_model(object, base = base, project = project), "character") expect_is( - parent <- get_parents(child, base = base, project = project), "list" + parent <- get_parents(child, base = base, project = project), + "list" ) expect_setequal(class(parent[[1]]), "n2kInla") diff --git a/tests/testthat/test_aca_n2k_anomaly.R b/tests/testthat/test_aca_n2k_anomaly.R index f2656e37..62d3d62d 100644 --- a/tests/testthat/test_aca_n2k_anomaly.R +++ b/tests/testthat/test_aca_n2k_anomaly.R @@ -10,12 +10,20 @@ test_that("n2kAnomaly", { ) %>% as.POSIXct(origin = "1970-01-01 00:00.00 UTC", tz = "Europe/Brussels") metadata <- data.frame( - scheme_id = schemeid, species_group_id = speciesid, - location_group_id = locationgroupid, model_type = "Unit test", - formula = "y ~ x", first_imported_year = 2000L, last_imported_year = 2010L, - duration = 11L, last_analysed_year = 2010L, analysis_date = analysisdate, - seed = 12345L, AnalysisVersion = version@AnalysisVersion$fingerprint, - status = "converged", stringsAsFactors = FALSE + scheme_id = schemeid, + species_group_id = speciesid, + location_group_id = locationgroupid, + model_type = "Unit test", + formula = "y ~ x", + first_imported_year = 2000L, + last_imported_year = 2010L, + duration = 11L, + last_analysed_year = 2010L, + analysis_date = analysisdate, + seed = 12345L, + AnalysisVersion = version@AnalysisVersion$fingerprint, + status = "converged", + stringsAsFactors = FALSE ) metadata$file_fingerprint <- metadata %>% select(-"status") %>% @@ -27,21 +35,26 @@ test_that("n2kAnomaly", { datafieldid <- sha1(letters) parameter <- data.frame( - description = c("Unit test", "Unit test letters"), parent = NA, + description = c("Unit test", "Unit test letters"), + parent = NA, stringsAsFactors = FALSE ) %>% mutate( fingerprint = map2_chr( - .data$description, .data$parent, ~sha1(c(description = .x, Parent = .y)) + .data$description, + .data$parent, + ~ sha1(c(description = .x, Parent = .y)) ) ) parameter <- expand.grid( - description = seq_len(10), parent = parameter$description, + description = seq_len(10), + parent = parameter$description, stringsAsFactors = FALSE ) %>% mutate( description = ifelse( - grepl("letters", .data$parent), LETTERS[.data$description], + grepl("letters", .data$parent), + LETTERS[.data$description], .data$description ) %>% sprintf(fmt = "Unit test %s") @@ -50,53 +63,66 @@ test_that("n2kAnomaly", { select("description", parent = "fingerprint") %>% mutate( fingerprint = map2_chr( - .data$description, .data$parent, ~sha1(c(description = .x, parent = .y)) + .data$description, + .data$parent, + ~ sha1(c(description = .x, parent = .y)) ) ) %>% bind_rows(parameter) %>% as.data.frame() parameterestimate <- expand.grid( - analysis = metadata$file_fingerprint, parameter = parameter$fingerprint, + analysis = metadata$file_fingerprint, + parameter = parameter$fingerprint, stringsAsFactors = FALSE ) %>% mutate( - estimate = rnorm(n()), SE = runif(n()), + estimate = rnorm(n()), + SE = runif(n()), lower_confidence_limit = .data$estimate - .data$SE, upper_confidence_limit = .data$estimate + .data$SE ) %>% select(-"SE") anomalytype <- data.frame( - description = c("Unit test", "Unit test 2"), stringsAsFactors = FALSE + description = c("Unit test", "Unit test 2"), + stringsAsFactors = FALSE ) %>% mutate( - fingerprint = map_chr(.data$description, ~sha1(c(description = .x))) + fingerprint = map_chr(.data$description, ~ sha1(c(description = .x))) ) %>% as.data.frame() anomaly <- expand.grid( anomaly_type = anomalytype$fingerprint, analysis = metadata$file_fingerprint, parameter = sample(parameter$fingerprint, min(5, nrow(parameter))), - datafield_id = datafieldid, observation = "1", stringsAsFactors = FALSE + datafield_id = datafieldid, + observation = "1", + stringsAsFactors = FALSE ) %>% mutate(estimate = seq_along(.data$analysis)) expect_is( new( - "n2kAnomaly", Parameter = parameter, ParameterEstimate = parameterestimate + "n2kAnomaly", + Parameter = parameter, + ParameterEstimate = parameterestimate ), "n2kAnomaly" ) expect_is( new( - "n2kAnomaly", Parameter = parameter, - ParameterEstimate = parameterestimate, AnomalyType = anomalytype + "n2kAnomaly", + Parameter = parameter, + ParameterEstimate = parameterestimate, + AnomalyType = anomalytype ), "n2kAnomaly" ) expect_is( new( - "n2kAnomaly", Parameter = parameter, - ParameterEstimate = parameterestimate, AnomalyType = anomalytype, + "n2kAnomaly", + Parameter = parameter, + ParameterEstimate = parameterestimate, + AnomalyType = anomalytype, Anomaly = anomaly ), "n2kAnomaly" @@ -104,25 +130,31 @@ test_that("n2kAnomaly", { # check for duplicates expect_error( new( - "n2kAnomaly", Parameter = parameter, - ParameterEstimate = parameterestimate, AnomalyType = anomalytype, + "n2kAnomaly", + Parameter = parameter, + ParameterEstimate = parameterestimate, + AnomalyType = anomalytype, Anomaly = cbind(anomaly, anomaly) ), "duplicated column names in Anomaly" ) expect_error( new( - "n2kAnomaly", Parameter = parameter, - ParameterEstimate = parameterestimate, AnomalyType = anomalytype, + "n2kAnomaly", + Parameter = parameter, + ParameterEstimate = parameterestimate, + AnomalyType = anomalytype, Anomaly = rbind(anomaly, anomaly) ), "Duplicated anomalies" ) expect_error( new( - "n2kAnomaly", Parameter = parameter, + "n2kAnomaly", + Parameter = parameter, ParameterEstimate = parameterestimate, - AnomalyType = rbind(anomalytype, anomalytype), Anomaly = anomaly + AnomalyType = rbind(anomalytype, anomalytype), + Anomaly = anomaly ), "Duplicated anomalytypes" ) @@ -130,7 +162,9 @@ test_that("n2kAnomaly", { # check for matching rows expect_error( new( - "n2kAnomaly", Parameter = parameter, AnomalyType = anomalytype, + "n2kAnomaly", + Parameter = parameter, + AnomalyType = anomalytype, Anomaly = anomaly ), paste( diff --git a/tests/testthat/test_aca_n2k_model_accessor.R b/tests/testthat/test_aca_n2k_model_accessor.R index 004ecc1f..9469430c 100644 --- a/tests/testthat/test_aca_n2k_model_accessor.R +++ b/tests/testthat/test_aca_n2k_model_accessor.R @@ -17,7 +17,7 @@ this_lc <- dataset %>% select("A", "B", "C", "D") %>% filter(.data$C == max(.data$C), .data$D == max(.data$D)) %>% distinct() %>% - model.matrix(object = ~A * (B + C) + C:D) + model.matrix(object = ~ A * (B + C) + C:D) object <- n2k_inla( result_datasource_id = this_result_datasource_id, scheme_id = this_scheme_id, @@ -34,10 +34,12 @@ object_model <- fit_model(object) test_that("status() returns the status of n2kModels", { expect_that( - status(object), is_identical_to(object@AnalysisMetadata$status) + status(object), + is_identical_to(object@AnalysisMetadata$status) ) expect_that( - status(object_model), is_identical_to(object_model@AnalysisMetadata$status) + status(object_model), + is_identical_to(object_model@AnalysisMetadata$status) ) }) test_that("status() updates the status of n2kModels", { @@ -60,7 +62,8 @@ test_that("get_seed() returns the seed slot", { test_that("get_scheme_id() returns the scheme_id slot", { expect_that( - get_scheme_id(object), is_identical_to(object@AnalysisMetadata$scheme_id) + get_scheme_id(object), + is_identical_to(object@AnalysisMetadata$scheme_id) ) }) diff --git a/tests/testthat/test_baa_get_file_fingerprint.R b/tests/testthat/test_baa_get_file_fingerprint.R index 44cd3c29..6e24d3fb 100644 --- a/tests/testthat/test_baa_get_file_fingerprint.R +++ b/tests/testthat/test_baa_get_file_fingerprint.R @@ -1,7 +1,9 @@ test_that("gets the correct fingerprint", { x <- n2k_manifest( data.frame( - fingerprint = "1", parent = NA_character_, stringsAsFactors = FALSE + fingerprint = "1", + parent = NA_character_, + stringsAsFactors = FALSE ) ) expect_identical(x@Fingerprint, get_file_fingerprint(x)) diff --git a/tests/testthat/test_baa_store_manifest.R b/tests/testthat/test_baa_store_manifest.R index db6f4f00..6158a8ba 100644 --- a/tests/testthat/test_baa_store_manifest.R +++ b/tests/testthat/test_baa_store_manifest.R @@ -4,7 +4,9 @@ test_that("store_manifest stores the manifest on a local file system", { dir.create(temp_dir) object <- n2k_manifest( data.frame( - fingerprint = "1", parent = NA_character_, stringsAsFactors = FALSE + fingerprint = "1", + parent = NA_character_, + stringsAsFactors = FALSE ) ) expect_is( @@ -31,7 +33,9 @@ test_that("store_manifest stores the manifest on an S3 bucket", { bucket <- get_bucket(Sys.getenv("N2KBUCKET"), prefix = project, max = 1) object <- n2k_manifest( data.frame( - fingerprint = "1", parent = NA_character_, stringsAsFactors = FALSE + fingerprint = "1", + parent = NA_character_, + stringsAsFactors = FALSE ) ) expect_type( diff --git a/tests/testthat/test_bba_read_manifest.R b/tests/testthat/test_bba_read_manifest.R index 6b2c9259..bc29f8b9 100644 --- a/tests/testthat/test_bba_read_manifest.R +++ b/tests/testthat/test_bba_read_manifest.R @@ -3,30 +3,38 @@ test_that("read_manifest reads the manifest on a local file system", { dir.create(temp_dir) object <- n2k_manifest( data.frame( - fingerprint = "1", parent = NA_character_, stringsAsFactors = FALSE + fingerprint = "1", + parent = NA_character_, + stringsAsFactors = FALSE ) ) object2 <- n2k_manifest( data.frame( - fingerprint = "4", parent = NA_character_, stringsAsFactors = FALSE + fingerprint = "4", + parent = NA_character_, + stringsAsFactors = FALSE ) ) expect_error( - read_manifest(temp_dir, "read_manifest"), "No manifest files in" + read_manifest(temp_dir, "read_manifest"), + "No manifest files in" ) file.path(temp_dir, "read_manifest", "manifest") |> normalizePath(mustWork = FALSE) |> dir.create(recursive = TRUE) expect_error( - read_manifest(temp_dir, "read_manifest"), "No manifest files in" + read_manifest(temp_dir, "read_manifest"), + "No manifest files in" ) store_manifest(object, temp_dir, "read_manifest") store_manifest(object2, temp_dir, "read_manifest") expect_equal( - read_manifest(temp_dir, "read_manifest", object@Fingerprint), object + read_manifest(temp_dir, "read_manifest", object@Fingerprint), + object ) expect_equal( - read_manifest(temp_dir, "read_manifest", object2@Fingerprint), object2 + read_manifest(temp_dir, "read_manifest", object2@Fingerprint), + object2 ) Sys.sleep(1) expect_equal(read_manifest(temp_dir, "read_manifest"), object2) @@ -45,12 +53,16 @@ test_that("read_manifest reads the manifest on an S3 bucket", { bucket <- get_bucket(Sys.getenv("N2KBUCKET"), prefix = project, max = 1) object <- n2k_manifest( data.frame( - fingerprint = "1", parent = NA_character_, stringsAsFactors = FALSE + fingerprint = "1", + parent = NA_character_, + stringsAsFactors = FALSE ) ) object2 <- n2k_manifest( data.frame( - fingerprint = "4", parent = NA_character_, stringsAsFactors = FALSE + fingerprint = "4", + parent = NA_character_, + stringsAsFactors = FALSE ) ) store_manifest(object, bucket, project) diff --git a/tests/testthat/test_bba_store_model.R b/tests/testthat/test_bba_store_model.R index fa51d114..f47c8793 100644 --- a/tests/testthat/test_bba_store_model.R +++ b/tests/testthat/test_bba_store_model.R @@ -19,11 +19,15 @@ test_that("store_model stores the model on a local file system", { dataset <- test_data() object <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, data = dataset + analysis_date = this_analysis_date, + data = dataset ) expect_is(filename <- store_model(object, base, project), "character") file_info <- file.info(filename) @@ -80,11 +84,15 @@ test_that("store_model stores the model on an S3 bucket", { dataset <- test_data() object <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, data = dataset + analysis_date = this_analysis_date, + data = dataset ) expect_is( filename <- store_model(x = object, base = bucket, project = project), diff --git a/tests/testthat/test_bbb_store_manifest_yaml.R b/tests/testthat/test_bbb_store_manifest_yaml.R index ab807b5e..248544e1 100644 --- a/tests/testthat/test_bbb_store_manifest_yaml.R +++ b/tests/testthat/test_bbb_store_manifest_yaml.R @@ -3,7 +3,9 @@ test_that("store_manifest_yaml stores the manifest on an S3 bucket", { skip_if(Sys.getenv("AWS_SECRET_ACCESS_KEY") == "", message = "No AWS access") object <- n2k_manifest( data.frame( - fingerprint = "1", parent = NA_character_, stringsAsFactors = FALSE + fingerprint = "1", + parent = NA_character_, + stringsAsFactors = FALSE ) ) project <- "unittest_store_manifest_yaml" @@ -12,7 +14,10 @@ test_that("store_manifest_yaml stores the manifest on an S3 bucket", { dependencies <- c("inbo/n2khelper@v0.4.1", "inbo/n2kanalysis@docker") expect_is( stored <- store_manifest_yaml( - x = object, base = bucket, project = project, docker = docker, + x = object, + base = bucket, + project = project, + docker = docker, dependencies = dependencies ), "character" @@ -29,7 +34,10 @@ test_that("store_manifest_yaml stores the manifest on an S3 bucket", { expect_is( stored2 <- store_manifest_yaml( - x = object, base = bucket, project = project, docker = docker, + x = object, + base = bucket, + project = project, + docker = docker, dependencies = dependencies ), "character" @@ -52,7 +60,9 @@ test_that("store_manifest_yaml stores the manifest on a file system", { dir.create(base) object <- n2k_manifest( data.frame( - fingerprint = "1", parent = NA_character_, stringsAsFactors = FALSE + fingerprint = "1", + parent = NA_character_, + stringsAsFactors = FALSE ) ) project <- "unittest_store_manifest_yaml" @@ -60,7 +70,10 @@ test_that("store_manifest_yaml stores the manifest on a file system", { dependencies <- c("inbo/n2khelper@v0.4.1", "inbo/n2kanalysis@docker") expect_is( stored <- store_manifest_yaml( - x = object, base = base, project = project, docker = docker, + x = object, + base = base, + project = project, + docker = docker, dependencies = dependencies ), "character" @@ -70,7 +83,10 @@ test_that("store_manifest_yaml stores the manifest on a file system", { expect_is( stored2 <- store_manifest_yaml( - x = object, base = base, project = project, docker = docker, + x = object, + base = base, + project = project, + docker = docker, dependencies = dependencies ), "character" diff --git a/tests/testthat/test_caa_fit_model.R b/tests/testthat/test_caa_fit_model.R index e216454a..9dfada1b 100644 --- a/tests/testthat/test_caa_fit_model.R +++ b/tests/testthat/test_caa_fit_model.R @@ -24,52 +24,80 @@ test_that("fit_model() on INLA based objects", { rownames(lin_comb_list2[[1]]) <- seq_along(unique(dataset$E)) object <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, data = dataset + analysis_date = this_analysis_date, + data = dataset ) object_lc <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, data = dataset, lin_comb = lin_comb + analysis_date = this_analysis_date, + data = dataset, + lin_comb = lin_comb ) object_lc_list <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, data = dataset, lin_comb = lin_comb_list + analysis_date = this_analysis_date, + data = dataset, + lin_comb = lin_comb_list ) object_lc_list2 <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, data = dataset, + analysis_date = this_analysis_date, + data = dataset, lin_comb = lin_comb_list2 ) object_badlc <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, data = dataset, lin_comb = bad_lin_comb + analysis_date = this_analysis_date, + data = dataset, + lin_comb = bad_lin_comb ) object_imp <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, imputation.size = 10, data = dataset + analysis_date = this_analysis_date, + imputation.size = 10, + data = dataset ) object_fit <- fit_model(object) object_lc_fit <- fit_model(object_lc) @@ -78,11 +106,21 @@ test_that("fit_model() on INLA based objects", { object_badlc_fit <- fit_model(object_badlc) object_imp_fit <- fit_model(object_imp) cat( - "\nobject_file <- \"", get_file_fingerprint(object), "\"\n", - "object_lc_file <- \"", get_file_fingerprint(object_lc), "\"\n", - "object_lc_list_file <- \"", get_file_fingerprint(object_lc_list), "\"\n", - "object_lc_list2_file <- \"", get_file_fingerprint(object_lc_list2), "\"\n", - "object_badlc_file <- \"", get_file_fingerprint(object_badlc), "\"\n", + "\nobject_file <- \"", + get_file_fingerprint(object), + "\"\n", + "object_lc_file <- \"", + get_file_fingerprint(object_lc), + "\"\n", + "object_lc_list_file <- \"", + get_file_fingerprint(object_lc_list), + "\"\n", + "object_lc_list2_file <- \"", + get_file_fingerprint(object_lc_list2), + "\"\n", + "object_badlc_file <- \"", + get_file_fingerprint(object_badlc), + "\"\n", sep = "" ) # 64-bit linux @@ -97,15 +135,18 @@ test_that("fit_model() on INLA based objects", { expect_identical(object_lc_file, get_file_fingerprint(object_lc)) expect_identical(object_lc_list_file, get_file_fingerprint(object_lc_list)) expect_identical( - object_lc_list2_file, get_file_fingerprint(object_lc_list2) + object_lc_list2_file, + get_file_fingerprint(object_lc_list2) ) expect_identical(object_badlc_file, get_file_fingerprint(object_badlc)) # doesn't alter the file fingerprint when fitting a model expect_identical( - get_file_fingerprint(object), get_file_fingerprint(object_fit) + get_file_fingerprint(object), + get_file_fingerprint(object_fit) ) expect_identical( - get_file_fingerprint(object_lc), get_file_fingerprint(object_lc_fit) + get_file_fingerprint(object_lc), + get_file_fingerprint(object_lc_fit) ) # returns valid objects @@ -118,7 +159,10 @@ test_that("fit_model() on INLA based objects", { expect_identical(status(filename)$status, "new") suppressWarnings(suppressMessages(fit_model(filename))) filename <- list.files( - temp_dir, pattern = basename(filename), recursive = TRUE, full.names = TRUE + temp_dir, + pattern = basename(filename), + recursive = TRUE, + full.names = TRUE ) expect_identical(status(filename)$status, "converged") analysis <- object_lc @@ -126,7 +170,10 @@ test_that("fit_model() on INLA based objects", { expect_identical(status(filename)$status, "new") suppressWarnings(suppressMessages(fit_model(filename))) filename <- list.files( - temp_dir, pattern = basename(filename), recursive = TRUE, full.names = TRUE + temp_dir, + pattern = basename(filename), + recursive = TRUE, + full.names = TRUE ) expect_identical(status(filename)$status, "converged") @@ -141,11 +188,15 @@ test_that("fit_model() on INLA based objects", { skip_on_os("windows") object_long <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, lin_comb = lin_comb, + analysis_date = this_analysis_date, + lin_comb = lin_comb, data = list(dataset) |> rep(10) |> bind_rows() |> @@ -172,29 +223,39 @@ test_that("fit_model() works on n2kInlaComparison", { analysis <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = "Count ~ A", first_imported_year = this_first_imported_year, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = "Count ~ A", + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, data = dataset + analysis_date = this_analysis_date, + data = dataset ) p1 <- get_file_fingerprint(analysis) filename1 <- store_model(analysis, base = temp_dir, project = "fit_model") analysis <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = "Count ~ A * B", first_imported_year = this_first_imported_year, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = "Count ~ A * B", + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, data = dataset + analysis_date = this_analysis_date, + data = dataset ) p2 <- get_file_fingerprint(analysis) filename2 <- store_model(analysis, base = temp_dir, project = "fit_model") analysis <- n2k_inla_comparison( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, formula = "~B", + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + formula = "~B", model_type = "inla comparison: A*B", first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, @@ -208,7 +269,9 @@ test_that("fit_model() works on n2kInlaComparison", { ) filename3 <- store_model(analysis, base = temp_dir, project = "fit_model") fit_model( - get_file_fingerprint(analysis), base = temp_dir, project = "fit_model", + get_file_fingerprint(analysis), + base = temp_dir, + project = "fit_model", verbose = FALSE ) @@ -239,29 +302,39 @@ test_that("fit_model() works on n2kInlaComposite", { analysis <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = "Count ~ A", first_imported_year = this_first_imported_year, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = "Count ~ A", + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, data = dataset + analysis_date = this_analysis_date, + data = dataset ) p1 <- get_file_fingerprint(analysis) filename1 <- store_model(analysis, base = temp_dir, project = "fit_model") analysis <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = "Count ~ A + B", first_imported_year = this_first_imported_year, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = "Count ~ A + B", + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, data = dataset + analysis_date = this_analysis_date, + data = dataset ) p2 <- get_file_fingerprint(analysis) filename2 <- store_model(analysis, base = temp_dir, project = "fit_model") analysis <- n2k_composite( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, formula = "~B", + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + formula = "~B", model_type = "inla comparison: A*B", first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, @@ -279,14 +352,16 @@ test_that("fit_model() works on n2kInlaComposite", { transmute( .data$value, estimate = .data$mean, - variance = .data$sd ^ 2 + variance = .data$sd^2 ) } ) filename3 <- store_model(analysis, base = temp_dir, project = "fit_model") fit_model( - get_file_fingerprint(analysis), base = temp_dir, project = "fit_model", + get_file_fingerprint(analysis), + base = temp_dir, + project = "fit_model", verbose = FALSE ) @@ -308,15 +383,24 @@ test_that("fit_model() works on n2kHurdleImputed", { this_date <- Sys.time() - 24 * 3600 dataset |> transmute( - .data$A, Count = ifelse(.data$Count > 0, .data$Count, NA), - .data$observation_id, .data$datafield_id + .data$A, + Count = ifelse(.data$Count > 0, .data$Count, NA), + .data$observation_id, + .data$datafield_id ) |> n2k_inla( - result_datasource_id = "a", scheme_id = "b", seed = 20230922, + result_datasource_id = "a", + scheme_id = "b", + seed = 20230922, model_type = "inla zeroinflatednbinomial0: A", - family = "zeroinflatednbinomial0", formula = "Count ~ 1", - species_group_id = "c", location_group_id = "d", first_imported_year = 1, - last_imported_year = 10, imputation_size = 9, analysis_date = this_date, + family = "zeroinflatednbinomial0", + formula = "Count ~ 1", + species_group_id = "c", + location_group_id = "d", + first_imported_year = 1, + last_imported_year = 10, + imputation_size = 9, + analysis_date = this_date, control = list( control.family = list( list(hyper = list(theta = list(initial = -11, fixed = TRUE))) @@ -325,15 +409,24 @@ test_that("fit_model() works on n2kHurdleImputed", { ) -> count dataset |> transmute( - .data$A, Presence = ifelse(.data$Count > 0, 1, 0), - .data$observation_id, .data$datafield_id + .data$A, + Presence = ifelse(.data$Count > 0, 1, 0), + .data$observation_id, + .data$datafield_id ) |> n2k_inla( - result_datasource_id = "a", scheme_id = "b", + result_datasource_id = "a", + scheme_id = "b", model_type = "inla binomial: A", - family = "binomial", formula = "Presence ~ 1", seed = 20230922, - species_group_id = "c", location_group_id = "d", first_imported_year = 1, - last_imported_year = 10, imputation_size = 9, analysis_date = this_date + family = "binomial", + formula = "Presence ~ 1", + seed = 20230922, + species_group_id = "c", + location_group_id = "d", + first_imported_year = 1, + last_imported_year = 10, + imputation_size = 9, + analysis_date = this_date ) -> presence hurdle <- n2k_hurdle_imputed(presence = presence, count = count) expect_s4_class(fit_model(hurdle, status = "error"), "n2kHurdleImputed") @@ -353,7 +446,9 @@ test_that("fit_model() works on n2kHurdleImputed", { suppressWarnings( expect_invisible( output <- fit_model( - basename(sha_presence), base = base, project = project + basename(sha_presence), + base = base, + project = project ) ) ) diff --git a/tests/testthat/test_cba_fit_model_manifest.R b/tests/testthat/test_cba_fit_model_manifest.R index fd3d2fd1..5fb93214 100644 --- a/tests/testthat/test_cba_fit_model_manifest.R +++ b/tests/testthat/test_cba_fit_model_manifest.R @@ -3,24 +3,39 @@ test_that("it handles a manifest", { dataset <- test_data() object <- n2k_inla( result_datasource_id = sha1(sample(letters)), - scheme_id = sha1(sample(letters)), species_group_id = sha1(sample(letters)), - location_group_id = sha1(sample(letters)), model_type = "inla poisson: A", - formula = "Count ~ A", first_imported_year = 1990, - last_imported_year = 2015, analysis_date = Sys.time(), data = dataset + scheme_id = sha1(sample(letters)), + species_group_id = sha1(sample(letters)), + location_group_id = sha1(sample(letters)), + model_type = "inla poisson: A", + formula = "Count ~ A", + first_imported_year = 1990, + last_imported_year = 2015, + analysis_date = Sys.time(), + data = dataset ) object2 <- n2k_inla( result_datasource_id = sha1(sample(letters)), - scheme_id = sha1(sample(letters)), species_group_id = sha1(sample(letters)), - location_group_id = sha1(sample(letters)), model_type = "inla poisson: B", - formula = "Count ~ B", first_imported_year = 1990, - last_imported_year = 2015, analysis_date = Sys.time(), data = dataset + scheme_id = sha1(sample(letters)), + species_group_id = sha1(sample(letters)), + location_group_id = sha1(sample(letters)), + model_type = "inla poisson: B", + formula = "Count ~ B", + first_imported_year = 1990, + last_imported_year = 2015, + analysis_date = Sys.time(), + data = dataset ) object3 <- n2k_inla( result_datasource_id = sha1(sample(letters)), - scheme_id = sha1(sample(letters)), species_group_id = sha1(sample(letters)), - location_group_id = sha1(sample(letters)), model_type = "inla poisson: C", - formula = "Count ~ C", first_imported_year = 1990, - last_imported_year = 2015, analysis_date = Sys.time(), data = dataset + scheme_id = sha1(sample(letters)), + species_group_id = sha1(sample(letters)), + location_group_id = sha1(sample(letters)), + model_type = "inla poisson: C", + formula = "Count ~ C", + first_imported_year = 1990, + last_imported_year = 2015, + analysis_date = Sys.time(), + data = dataset ) # works with local file @@ -31,24 +46,35 @@ test_that("it handles a manifest", { store_model(object3, base = base, project = project) manif <- data.frame( fingerprint = c( - get_file_fingerprint(object), get_file_fingerprint(object2), + get_file_fingerprint(object), + get_file_fingerprint(object2), get_file_fingerprint(object3) ), parent = c( - NA, get_file_fingerprint(object), get_file_fingerprint(object2) + NA, + get_file_fingerprint(object), + get_file_fingerprint(object2) ), stringsAsFactors = FALSE ) |> n2k_manifest() hash <- store_manifest_yaml( - x = manif, base = base, project = project, docker = "inbobmk/rn2k:dev-0.10", + x = manif, + base = base, + project = project, + docker = "inbobmk/rn2k:dev-0.10", dependencies = c("inbo/n2khelper@v0.5.0", "inbo/n2kanalysis@0.4.0") ) script <- manifest_yaml_to_bash( - base = base, project = project, hash = basename(hash) + base = base, + project = project, + hash = basename(hash) ) results <- get_result( - x = manif, base = base, project = project, verbose = FALSE + x = manif, + base = base, + project = project, + verbose = FALSE ) expect_s4_class(results, "n2kResult") expect_identical( @@ -63,7 +89,10 @@ test_that("it handles a manifest", { expect_null(fit_model(y, base = base, project = project, verbose = FALSE)) expect_null(fit_model(y, verbose = FALSE)) results <- get_result( - x = manif, base = base, project = project, verbose = FALSE + x = manif, + base = base, + project = project, + verbose = FALSE ) expect_s4_class(results, "n2kResult") expect_identical( @@ -73,7 +102,10 @@ test_that("it handles a manifest", { expect_true(all(status(results) == "converged")) expect_s4_class( results <- get_result( - x = manif, base = base, project = project, verbose = FALSE + x = manif, + base = base, + project = project, + verbose = FALSE ), "n2kResult" ) @@ -86,7 +118,6 @@ test_that("it handles a manifest", { ) |> file.remove() - # works with an S3 bucket skip_if(Sys.getenv("AWS_SECRET_ACCESS_KEY") == "", message = "No AWS access") aws_base <- get_bucket(Sys.getenv("N2KBUCKET"), max = 1) @@ -95,25 +126,35 @@ test_that("it handles a manifest", { store_model(object3, base = aws_base, project = project) manif <- data.frame( fingerprint = c( - get_file_fingerprint(object), get_file_fingerprint(object2), + get_file_fingerprint(object), + get_file_fingerprint(object2), get_file_fingerprint(object3) ), parent = c( - NA, get_file_fingerprint(object), get_file_fingerprint(object2) + NA, + get_file_fingerprint(object), + get_file_fingerprint(object2) ), stringsAsFactors = FALSE ) |> n2k_manifest() hash <- store_manifest_yaml( - x = manif, base = aws_base, project = project, + x = manif, + base = aws_base, + project = project, docker = "inbobmk/rn2k:dev-0.10", dependencies = c("inbo/n2khelper@v0.5.0", "inbo/n2kanalysis@0.4.0") ) script <- manifest_yaml_to_bash( - base = aws_base, project = project, hash = basename(hash) + base = aws_base, + project = project, + hash = basename(hash) ) results <- get_result( - x = manif, base = aws_base, project = project, verbose = FALSE + x = manif, + base = aws_base, + project = project, + verbose = FALSE ) expect_s4_class(results, "n2kResult") expect_identical( @@ -125,7 +166,10 @@ test_that("it handles a manifest", { fit_model(manif, base = aws_base, project = project, verbose = FALSE) ) results <- get_result( - x = manif, base = aws_base, project = project, verbose = FALSE + x = manif, + base = aws_base, + project = project, + verbose = FALSE ) expect_s4_class(results, "n2kResult") expect_identical( diff --git a/tests/testthat/test_cba_model_impute.R b/tests/testthat/test_cba_model_impute.R index a9439119..0c46a994 100644 --- a/tests/testthat/test_cba_model_impute.R +++ b/tests/testthat/test_cba_model_impute.R @@ -16,47 +16,69 @@ test_that("model imputation works", { project <- "imputation" imputation <- n2k_inla( - data = dataset, scheme_id = this_scheme_id, + data = dataset, + scheme_id = this_scheme_id, result_datasource_id = this_result_datasource_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - first_imported_year = this_first_imported_year, imputation_size = 3, - last_imported_year = this_last_imported_year, family = "poisson", - last_analyses_year = this_last_analysed_year, duration = this_duration, + location_group_id = this_location_group_id, + model_type = this_model_type, + first_imported_year = this_first_imported_year, + imputation_size = 3, + last_imported_year = this_last_imported_year, + family = "poisson", + last_analyses_year = this_last_analysed_year, + duration = this_duration, formula = "Count ~ A + f(E, model = \"iid\")", analysis_date = Sys.time() ) aggregation <- n2k_aggregate( scheme_id = this_scheme_id, - result_datasource_id = this_result_datasource_id, formula = "~ A + B", + result_datasource_id = this_result_datasource_id, + formula = "~ A + B", species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - first_imported_year = this_first_imported_year, analysis_date = Sys.time(), - last_imported_year = this_last_imported_year, fun = sum, - last_analyses_year = this_last_analysed_year, duration = this_duration, + location_group_id = this_location_group_id, + model_type = this_model_type, + first_imported_year = this_first_imported_year, + analysis_date = Sys.time(), + last_imported_year = this_last_imported_year, + fun = sum, + last_analyses_year = this_last_analysed_year, + duration = this_duration, parent = get_file_fingerprint(imputation) ) aggregation2 <- n2k_aggregate( scheme_id = this_scheme_id, - result_datasource_id = this_result_datasource_id, formula = "~ A", + result_datasource_id = this_result_datasource_id, + formula = "~ A", species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - first_imported_year = this_first_imported_year, analysis_date = Sys.time(), - last_imported_year = this_last_imported_year, fun = sum, - last_analyses_year = this_last_analysed_year, duration = this_duration, + location_group_id = this_location_group_id, + model_type = this_model_type, + first_imported_year = this_first_imported_year, + analysis_date = Sys.time(), + last_imported_year = this_last_imported_year, + fun = sum, + last_analyses_year = this_last_analysed_year, + duration = this_duration, parent = get_file_fingerprint(aggregation) ) extractor <- function(model) { model$summary.fixed[, c("mean", "sd")] } mi <- n2k_model_imputed( - scheme_id = this_scheme_id, model_args = list(family = "poisson"), - result_datasource_id = this_result_datasource_id, model_fun = INLA::inla, - species_group_id = this_species_group_id, extractor = extractor, - location_group_id = this_location_group_id, model_type = this_model_type, - first_imported_year = this_first_imported_year, analysis_date = Sys.time(), - last_imported_year = this_last_imported_year, formula = "~ A", - last_analyses_year = this_last_analysed_year, duration = this_duration, + scheme_id = this_scheme_id, + model_args = list(family = "poisson"), + result_datasource_id = this_result_datasource_id, + model_fun = INLA::inla, + species_group_id = this_species_group_id, + extractor = extractor, + location_group_id = this_location_group_id, + model_type = this_model_type, + first_imported_year = this_first_imported_year, + analysis_date = Sys.time(), + last_imported_year = this_last_imported_year, + formula = "~ A", + last_analyses_year = this_last_analysed_year, + duration = this_duration, parent = get_file_fingerprint(aggregation) ) pma <- list( @@ -65,14 +87,22 @@ test_that("model imputation works", { } ) mi2 <- n2k_model_imputed( - scheme_id = this_scheme_id, model_args = list(), - result_datasource_id = this_result_datasource_id, model_fun = INLA::inla, - species_group_id = this_species_group_id, extractor = extractor, - location_group_id = this_location_group_id, model_type = this_model_type, - first_imported_year = this_first_imported_year, analysis_date = Sys.time(), - last_imported_year = this_last_imported_year, formula = "~ A", - last_analyses_year = this_last_analysed_year, duration = this_duration, - parent = get_file_fingerprint(aggregation), prepare_model_args = pma + scheme_id = this_scheme_id, + model_args = list(), + result_datasource_id = this_result_datasource_id, + model_fun = INLA::inla, + species_group_id = this_species_group_id, + extractor = extractor, + location_group_id = this_location_group_id, + model_type = this_model_type, + first_imported_year = this_first_imported_year, + analysis_date = Sys.time(), + last_imported_year = this_last_imported_year, + formula = "~ A", + last_analyses_year = this_last_analysed_year, + duration = this_duration, + parent = get_file_fingerprint(aggregation), + prepare_model_args = pma ) store_model(imputation, base, project) store_model(aggregation, base, project) @@ -82,22 +112,28 @@ test_that("model imputation works", { expect_message( suppressWarnings({ fit_model( - get_file_fingerprint(imputation), base, project, + get_file_fingerprint(imputation), + base, + project, parallel_configs = FALSE ) }), "converged" ) expect_message( - fit_model(get_file_fingerprint(aggregation), base, project), "converged" + fit_model(get_file_fingerprint(aggregation), base, project), + "converged" ) expect_message( - fit_model(get_file_fingerprint(mi), base, project), "converged" + fit_model(get_file_fingerprint(mi), base, project), + "converged" ) expect_message( - fit_model(get_file_fingerprint(aggregation2), base, project), "converged" + fit_model(get_file_fingerprint(aggregation2), base, project), + "converged" ) expect_message( - fit_model(get_file_fingerprint(mi2), base, project), "converged" + fit_model(get_file_fingerprint(mi2), base, project), + "converged" ) }) diff --git a/tests/testthat/test_daa_get_model_parameter.R b/tests/testthat/test_daa_get_model_parameter.R index 3aff6242..8af049c8 100644 --- a/tests/testthat/test_daa_get_model_parameter.R +++ b/tests/testthat/test_daa_get_model_parameter.R @@ -1,6 +1,7 @@ test_that( - "n2kInla with categorical and numeric fixed effect without random - effect", + paste( + "n2kInla with categorical and numeric fixed effect without random effect" + ), { dataset <- test_data() this_analysis_date <- as.POSIXct("2015-01-01 04:05:06.12", tz = "UTC") @@ -16,16 +17,21 @@ test_that( this_parent <- "abcdef" this_duration <- this_last_imported_year - this_first_imported_year + 1 analysis <- n2k_inla( - data = dataset, formula = "Count ~ A + C", + data = dataset, + formula = "Count ~ A + C", result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, family = "nbinomial", + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + family = "nbinomial", model_type = this_model_type, first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, last_analysed_year = this_last_analysed_year, - analysis_date = this_analysis_date, seed = this_seed, - parent = this_parent, duration = this_duration + analysis_date = this_analysis_date, + seed = this_seed, + parent = this_parent, + duration = this_duration ) expect_is(param <- get_model_parameter(analysis), "n2kParameter") expect_identical(nrow(param@Parameter), 0L) @@ -40,7 +46,8 @@ test_that( expect_identical( param@Parameter %>% semi_join( - tibble(description = "Random effect BLUP"), by = "description" + tibble(description = "Random effect BLUP"), + by = "description" ) %>% inner_join(param@Parameter, by = c("fingerprint" = "parent")) %>% nrow(), @@ -49,7 +56,8 @@ test_that( expect_identical( param@Parameter %>% semi_join( - tibble(description = "Random effect variance"), by = "description" + tibble(description = "Random effect variance"), + by = "description" ) %>% inner_join(param@Parameter, by = c("fingerprint" = "parent")) %>% nrow(), @@ -70,8 +78,10 @@ test_that( ) test_that( - "n2kInla with single random effect, categorical-categorical - interaction and categorical numeric interaction", + paste( + "n2kInla with single random effect, categorical-categorical interaction", + "and categorical numeric interaction" + ), { dataset <- test_data() this_analysis_date <- as.POSIXct("2015-01-01 04:05:06.12", tz = "UTC") @@ -89,14 +99,19 @@ test_that( analysis <- n2k_inla( formula = "Count ~ 0 + A * C + A * B + f(E, model = \"iid\")", result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, family = "nbinomial", + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + family = "nbinomial", model_type = this_model_type, first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, last_analysed_year = this_last_analysed_year, - analysis_date = this_analysis_date, seed = this_seed, data = dataset, - parent = this_parent, duration = this_duration + analysis_date = this_analysis_date, + seed = this_seed, + data = dataset, + parent = this_parent, + duration = this_duration ) analysis <- fit_model(analysis) expect_message( @@ -107,7 +122,8 @@ test_that( expect_identical( param@Parameter %>% semi_join( - tibble(description = "Random effect variance"), by = "description" + tibble(description = "Random effect variance"), + by = "description" ) %>% inner_join(param@Parameter, by = c("fingerprint" = "parent")) %>% nrow(), @@ -136,7 +152,8 @@ test_that( semi_join( param@Parameter %>% semi_join( - tibble(description = "Random effect BLUP"), by = "description" + tibble(description = "Random effect BLUP"), + by = "description" ), by = c("parent" = "fingerprint") ) %>% @@ -192,17 +209,23 @@ test_that( this_parent <- "abcdef" this_duration <- this_last_imported_year - this_first_imported_year + 1 analysis <- n2k_inla( - formula = - "Count ~ C * D + f(E, model = \"rw1\", replicate = as.integer(A))", + formula = paste( + "Count ~ C * D + f(E, model = \"rw1\", replicate = as.integer(A))" + ), result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, family = "nbinomial", + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + family = "nbinomial", model_type = this_model_type, first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, last_analysed_year = this_last_analysed_year, - analysis_date = this_analysis_date, seed = this_seed, data = dataset, - parent = this_parent, duration = this_duration + analysis_date = this_analysis_date, + seed = this_seed, + data = dataset, + parent = this_parent, + duration = this_duration ) analysis <- fit_model(analysis) expect_message( @@ -213,7 +236,8 @@ test_that( expect_identical( param@Parameter %>% semi_join( - tibble(description = "Random effect variance"), by = "description" + tibble(description = "Random effect variance"), + by = "description" ) %>% inner_join(param@Parameter, by = c("fingerprint" = "parent")) %>% nrow(), @@ -243,7 +267,8 @@ test_that( semi_join( param@Parameter %>% semi_join( - tibble(description = "Random effect BLUP"), by = "description" + tibble(description = "Random effect BLUP"), + by = "description" ), by = c("parent" = "fingerprint") ) %>% @@ -263,10 +288,14 @@ test_that( random %>% group_by(.data$main, .data$level) %>% summarise( - n = n(), missing = mean(is.na(.data$level2)), .groups = "drop_last" + n = n(), + missing = mean(is.na(.data$level2)), + .groups = "drop_last" ) %>% summarise( - n1 = n(), n2 = mean(.data$n), missing = mean(.data$missing), + n1 = n(), + n2 = mean(.data$n), + missing = mean(.data$missing), .groups = "drop" ), tibble(main = "E", n1 = 3L, n2 = 10, missing = 0) @@ -277,7 +306,9 @@ test_that( random %>% mutate( finger2 = ifelse( - is.na(.data$finger2), .data$finger, .data$finger2 + is.na(.data$finger2), + .data$finger, + .data$finger2 ) ), by = c("parameter" = "finger2") @@ -288,79 +319,94 @@ test_that( } ) -test_that( - "imputation and aggregation", - { - set.seed(20191213) - this_result_datasource_id <- sha1(letters) - this_scheme_id <- sha1(letters) - this_species_group_id <- sha1(letters) - this_location_group_id <- sha1(letters) - this_analysis_date <- Sys.time() - this_model_type <- "inla poisson: A * (B + C) + C:D" - this_first_imported_year <- 1990L - this_last_imported_year <- 2015L - this_last_analysed_year <- 2014L - this_duration <- 1L - dataset <- test_data(missing = 0.2) - base <- tempfile("imputation") - dir.create(base) - project <- "imputation" - - imputation <- n2k_inla( - data = dataset, scheme_id = this_scheme_id, - result_datasource_id = this_result_datasource_id, - species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - first_imported_year = this_first_imported_year, imputation_size = 3, - last_imported_year = this_last_imported_year, family = "poisson", - last_analyses_year = this_last_analysed_year, duration = this_duration, - formula = "Count ~ A + f(E, model = \"iid\")", analysis_date = Sys.time(), - ) - aggregation <- n2k_aggregate( - scheme_id = this_scheme_id, - result_datasource_id = this_result_datasource_id, formula = "~ A + B", - species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - first_imported_year = this_first_imported_year, - last_imported_year = this_last_imported_year, fun = sum, - last_analyses_year = this_last_analysed_year, duration = this_duration, - parent = get_file_fingerprint(imputation), analysis_date = Sys.time() - ) - expect_is(result <- get_model_parameter(imputation), "n2kParameter") - expect_equal(nrow(result@Parameter), 0L) - expect_is(result <- get_model_parameter(aggregation), "n2kParameter") - expect_equal(nrow(result@Parameter), 0L) +test_that("imputation and aggregation", { + set.seed(20191213) + this_result_datasource_id <- sha1(letters) + this_scheme_id <- sha1(letters) + this_species_group_id <- sha1(letters) + this_location_group_id <- sha1(letters) + this_analysis_date <- Sys.time() + this_model_type <- "inla poisson: A * (B + C) + C:D" + this_first_imported_year <- 1990L + this_last_imported_year <- 2015L + this_last_analysed_year <- 2014L + this_duration <- 1L + dataset <- test_data(missing = 0.2) + base <- tempfile("imputation") + dir.create(base) + project <- "imputation" - suppressWarnings({ - imputation <- fit_model(imputation, parallel_configs = FALSE) - }) - store_model(imputation, base = base, project = project) - aggregation <- fit_model(aggregation, base = base, project = project) - expect_is(result <- get_model_parameter(imputation), "n2kParameter") - expect_equal(nrow(result@Parameter), 1956L) - expect_is(result <- get_model_parameter(aggregation), "n2kParameter") - expect_equal(nrow(result@Parameter), 14L) + imputation <- n2k_inla( + data = dataset, + scheme_id = this_scheme_id, + result_datasource_id = this_result_datasource_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + first_imported_year = this_first_imported_year, + imputation_size = 3, + last_imported_year = this_last_imported_year, + family = "poisson", + last_analyses_year = this_last_analysed_year, + duration = this_duration, + formula = "Count ~ A + f(E, model = \"iid\")", + analysis_date = Sys.time(), + ) + aggregation <- n2k_aggregate( + scheme_id = this_scheme_id, + result_datasource_id = this_result_datasource_id, + formula = "~ A + B", + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + first_imported_year = this_first_imported_year, + last_imported_year = this_last_imported_year, + fun = sum, + last_analyses_year = this_last_analysed_year, + duration = this_duration, + parent = get_file_fingerprint(imputation), + analysis_date = Sys.time() + ) + expect_is(result <- get_model_parameter(imputation), "n2kParameter") + expect_equal(nrow(result@Parameter), 0L) + expect_is(result <- get_model_parameter(aggregation), "n2kParameter") + expect_equal(nrow(result@Parameter), 0L) - store_model(aggregation, base = base, project = project) - extractor <- function(model) { - model$summary.fixed[, c("mean", "sd")] - } - mi <- n2k_model_imputed( - scheme_id = this_scheme_id, model_args = list(family = "poisson"), - result_datasource_id = this_result_datasource_id, model_fun = INLA::inla, - species_group_id = this_species_group_id, extractor = extractor, - location_group_id = this_location_group_id, model_type = this_model_type, - first_imported_year = this_first_imported_year, - analysis_date = Sys.time(), last_imported_year = this_last_imported_year, - formula = "~ A", last_analyses_year = this_last_analysed_year, - duration = this_duration, parent = get_file_fingerprint(aggregation) - ) - expect_is(result <- get_model_parameter(mi), "n2kParameter") - expect_equal(nrow(result@Parameter), 0L) + suppressWarnings({ + imputation <- fit_model(imputation, parallel_configs = FALSE) + }) + store_model(imputation, base = base, project = project) + aggregation <- fit_model(aggregation, base = base, project = project) + expect_is(result <- get_model_parameter(imputation), "n2kParameter") + expect_equal(nrow(result@Parameter), 1956L) + expect_is(result <- get_model_parameter(aggregation), "n2kParameter") + expect_equal(nrow(result@Parameter), 14L) - mi <- fit_model(mi, base = base, project = project) - expect_is(result <- get_model_parameter(mi), "n2kParameter") - expect_equal(nrow(result@Parameter), 4L) + store_model(aggregation, base = base, project = project) + extractor <- function(model) { + model$summary.fixed[, c("mean", "sd")] } -) + mi <- n2k_model_imputed( + scheme_id = this_scheme_id, + model_args = list(family = "poisson"), + result_datasource_id = this_result_datasource_id, + model_fun = INLA::inla, + species_group_id = this_species_group_id, + extractor = extractor, + location_group_id = this_location_group_id, + model_type = this_model_type, + first_imported_year = this_first_imported_year, + analysis_date = Sys.time(), + last_imported_year = this_last_imported_year, + formula = "~ A", + last_analyses_year = this_last_analysed_year, + duration = this_duration, + parent = get_file_fingerprint(aggregation) + ) + expect_is(result <- get_model_parameter(mi), "n2kParameter") + expect_equal(nrow(result@Parameter), 0L) + + mi <- fit_model(mi, base = base, project = project) + expect_is(result <- get_model_parameter(mi), "n2kParameter") + expect_equal(nrow(result@Parameter), 4L) +}) diff --git a/tests/testthat/test_daa_get_status_fingerprint.R b/tests/testthat/test_daa_get_status_fingerprint.R index fa4fd1a3..3de49f3c 100644 --- a/tests/testthat/test_daa_get_status_fingerprint.R +++ b/tests/testthat/test_daa_get_status_fingerprint.R @@ -1,5 +1,4 @@ test_that("status fingerprint for n2k_inla", { - dataset <- test_data() this_analysis_date <- as.POSIXct("2015-01-01 12:13:14", tz = "UTC") this_result_datasource_id <- sha1(sample(letters)) @@ -17,21 +16,32 @@ test_that("status fingerprint for n2k_inla", { object <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - formula = this_formula, first_imported_year = this_first_imported_year, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + formula = this_formula, + first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, last_analysed_year = this_last_analysed_year, - analysis_date = this_analysis_date, seed = this_seed, data = dataset, - parent = this_parent, duration = this_duration + analysis_date = this_analysis_date, + seed = this_seed, + data = dataset, + parent = this_parent, + duration = this_duration ) version <- get_analysis_version(sessionInfo()) status_fingerprint <- sha1( list( - get_file_fingerprint(object), status(object), NULL, - version@AnalysisVersion$fingerprint, version@AnalysisVersion, - version@RPackage, version@AnalysisVersionRPackage, - object@AnalysisRelation, NULL + get_file_fingerprint(object), + status(object), + NULL, + version@AnalysisVersion$fingerprint, + version@AnalysisVersion, + version@RPackage, + version@AnalysisVersionRPackage, + object@AnalysisRelation, + NULL ), digits = 6L ) diff --git a/tests/testthat/test_eaa_get_result.R b/tests/testthat/test_eaa_get_result.R index b5989092..6490bb72 100644 --- a/tests/testthat/test_eaa_get_result.R +++ b/tests/testthat/test_eaa_get_result.R @@ -21,15 +21,21 @@ test_that("get_result on n2kInla", { f(G, model = 'iid')" analysis <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, family = "nbinomial", - model_type = this_model_type, formula = this_formula, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + family = "nbinomial", + model_type = this_model_type, + formula = this_formula, first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, data = dataset + analysis_date = this_analysis_date, + data = dataset ) result <- get_result( - analysis, datasource_id = this_datasource, verbose = FALSE + analysis, + datasource_id = this_datasource, + verbose = FALSE ) expect_is(result, "n2kResult") expect_identical(nrow(result@Parameter), 0L) @@ -40,8 +46,10 @@ test_that("get_result on n2kInla", { expect_equal( basename(filename) |> get_result( - base = temp_dir, project = "get_result", - datasource_id = this_datasource, verbose = FALSE + base = temp_dir, + project = "get_result", + datasource_id = this_datasource, + verbose = FALSE ), result ) @@ -56,8 +64,10 @@ test_that("get_result on n2kInla", { expect_equal( basename(filename) |> get_result( - base = temp_dir, project = "get_result", - datasource_id = this_datasource, verbose = FALSE + base = temp_dir, + project = "get_result", + datasource_id = this_datasource, + verbose = FALSE ), result ) @@ -67,21 +77,28 @@ test_that("get_result on n2kInla", { filter(.data$C == max(.data$C), .data$D == max(.data$D)) |> select("A", "B", "C", "D") |> distinct() |> - model.matrix(object = ~A * (B + C) + C:D) -> lin_comb + model.matrix(object = ~ A * (B + C) + C:D) -> lin_comb rownames(lin_comb) <- seq_len(nrow(lin_comb)) this_parent <- "abcd" analysis <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, family = "nbinomial", - model_type = this_model_type, formula = this_formula, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + family = "nbinomial", + model_type = this_model_type, + formula = this_formula, first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, data = dataset, lin_comb = lin_comb, + analysis_date = this_analysis_date, + data = dataset, + lin_comb = lin_comb, parent = this_parent ) result2 <- get_result( - analysis, datasource_id = this_datasource, verbose = FALSE + analysis, + datasource_id = this_datasource, + verbose = FALSE ) expect_is(result2, "n2kResult") expect_identical(nrow(result2@Parameter), 0L) @@ -94,8 +111,10 @@ test_that("get_result on n2kInla", { expect_equal( basename(filename) |> get_result( - base = temp_dir, project = "get_result", - datasource_id = this_datasource, verbose = FALSE + base = temp_dir, + project = "get_result", + datasource_id = this_datasource, + verbose = FALSE ), result2 ) @@ -112,8 +131,10 @@ test_that("get_result on n2kInla", { expect_equal( basename(filename) |> get_result( - base = temp_dir, project = "get_result", - datasource_id = this_datasource, verbose = FALSE + base = temp_dir, + project = "get_result", + datasource_id = this_datasource, + verbose = FALSE ), result2 ) @@ -123,16 +144,23 @@ test_that("get_result on n2kInla", { names(lin_comb[[1]]) <- seq_along(lin_comb[[1]]) analysis <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, family = "nbinomial", - model_type = this_model_type, formula = this_formula, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + family = "nbinomial", + model_type = this_model_type, + formula = this_formula, first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, data = dataset, lin_comb = lin_comb, + analysis_date = this_analysis_date, + data = dataset, + lin_comb = lin_comb, parent = this_parent ) result3 <- get_result( - analysis, datasource_id = this_datasource, verbose = FALSE + analysis, + datasource_id = this_datasource, + verbose = FALSE ) expect_is(result3, "n2kResult") expect_identical(nrow(result3@Parameter), 0L) @@ -144,15 +172,19 @@ test_that("get_result on n2kInla", { expect_equal( basename(filename) |> get_result( - base = temp_dir, project = "get_result", - datasource_id = this_datasource, verbose = FALSE + base = temp_dir, + project = "get_result", + datasource_id = this_datasource, + verbose = FALSE ), result3 ) fit_model(filename, verbose = FALSE) filename <- gsub(pattern = "new", replacement = "converged", filename) result3 <- get_result( - readRDS(filename), datasource_id = this_datasource, verbose = FALSE + readRDS(filename), + datasource_id = this_datasource, + verbose = FALSE ) expect_is(result3, "n2kResult") expect_lt(0, nrow(result3@Parameter)) @@ -163,13 +195,14 @@ test_that("get_result on n2kInla", { expect_equal( basename(filename) |> get_result( - base = temp_dir, project = "get_result", - datasource_id = this_datasource, verbose = FALSE + base = temp_dir, + project = "get_result", + datasource_id = this_datasource, + verbose = FALSE ), result3 ) - # with linear combination as list of matrices lc_e <- max(dataset$E) |> diag() |> @@ -191,17 +224,24 @@ test_that("get_result on n2kInla", { ) -> lin_comb analysis <- n2k_inla( result_datasource_id = this_result_datasource_id, - scheme_id = this_scheme_id, species_group_id = this_species_group_id, - location_group_id = this_location_group_id, model_type = this_model_type, - family = "nbinomial", formula = this_formula, + scheme_id = this_scheme_id, + species_group_id = this_species_group_id, + location_group_id = this_location_group_id, + model_type = this_model_type, + family = "nbinomial", + formula = this_formula, first_imported_year = this_first_imported_year, last_imported_year = this_last_imported_year, - analysis_date = this_analysis_date, data = dataset, lin_comb = lin_comb, + analysis_date = this_analysis_date, + data = dataset, + lin_comb = lin_comb, replicate_name = list(E = levels(dataset$A)), parent = this_parent ) result4 <- get_result( - analysis, datasource_id = this_datasource, verbose = FALSE + analysis, + datasource_id = this_datasource, + verbose = FALSE ) expect_is(result4, "n2kResult") expect_identical(nrow(result4@Parameter), 0L) @@ -213,8 +253,10 @@ test_that("get_result on n2kInla", { expect_equal( basename(filename) |> get_result( - base = temp_dir, project = "get_result", - datasource_id = this_datasource, verbose = FALSE + base = temp_dir, + project = "get_result", + datasource_id = this_datasource, + verbose = FALSE ), result4 ) @@ -231,8 +273,10 @@ test_that("get_result on n2kInla", { expect_equal( basename(filename) |> get_result( - base = temp_dir, project = "get_result", - datasource_id = this_datasource, verbose = FALSE + base = temp_dir, + project = "get_result", + datasource_id = this_datasource, + verbose = FALSE ), result4 ) @@ -244,8 +288,10 @@ test_that("get_result on n2kInla", { expect_identical( get_file_fingerprint(combined_result), sort(c( - get_file_fingerprint(result), get_file_fingerprint(result2), - get_file_fingerprint(result3), get_file_fingerprint(result4) + get_file_fingerprint(result), + get_file_fingerprint(result2), + get_file_fingerprint(result3), + get_file_fingerprint(result4) )) ) # clean temp files From 9fff811284674fa7139cce896aafd371da6983a4 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Thu, 18 Sep 2025 15:14:22 +0200 Subject: [PATCH 14/16] =?UTF-8?q?=F0=9F=93=9D=20Update=20citation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DESCRIPTION | 3 ++- inst/CITATION | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5ec3bf19..1163e4ae 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,7 +4,8 @@ Version: 0.4.1 Authors@R: c( person("Thierry", "Onkelinx", , "thierry.onkelinx@inbo.be", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8804-4216", affiliation = "Research Institute for Nature and Forest (INBO)")), - person("Research Institute for Nature and Forest (INBO)", , , "info@inbo.be", role = c("cph", "fnd")) + person("Research Institute for Nature and Forest (INBO)", , , "info@inbo.be", role = c("cph", "fnd"), + comment = c(ROR = "https://ror.org/00j54wy13")) ) Description: All generic functions and classes for the analysis for the 'Natura 2000' monitoring. The classes contain all required data and diff --git a/inst/CITATION b/inst/CITATION index 646e7e4a..d497f572 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -3,7 +3,7 @@ citHeader("To cite `n2kanalysis` in publications please use:") bibentry( bibtype = "Manual", title = "n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring. Version 0.4.1", - author = c( author = c(person(given = "Thierry", family = "Onkelinx"))), + author = c(person(given = "Thierry", family = "Onkelinx")), year = 2025, url = "https://inbo.github.io/n2kanalysis/", abstract = "All generic functions and classes for the analysis for the 'Natura 2000' monitoring. The classes contain all required data and definitions to fit the model without the need to access other sources. Potentially they might need access to one or more parent objects. An aggregation object might for example need the result of an imputation object. The actual definition of the analysis, using these generic function and classes, is defined in dedictated analysis R packages for every monitoring scheme. For example 'abvanalysis' and 'watervogelanalysis'.", From c1ecd5ea69627806f4fe33215aed99ef9c5eaa12 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Mon, 29 Sep 2025 17:46:28 +0200 Subject: [PATCH 15/16] =?UTF-8?q?=E2=9C=A8=20Add=20connect=5Finbo=5Fs3()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DESCRIPTION | 1 + NAMESPACE | 2 ++ R/connect_inbo_s3.R | 37 +++++++++++++++++++++++++++++++++++++ man/connect_inbo_s3.Rd | 15 +++++++++++++++ 4 files changed, 55 insertions(+) create mode 100644 R/connect_inbo_s3.R create mode 100644 man/connect_inbo_s3.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 1163e4ae..9fc9a60e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,6 +73,7 @@ Collate: 'n2k_analysis_metadata_class.R' 'n2k_result_class.R' 'combine_result.R' + 'connect_inbo_s3.R' 'import_s3_classes.R' 'delete_model.R' 'display.R' diff --git a/NAMESPACE b/NAMESPACE index bae9efcc..a352970a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ S3method(sha1,aggregatedImputed) S3method(sha1,inla) S3method(sha1,rawImputed) +export(connect_inbo_s3) export(display) export(fit_every_model) export(get_datafield_id) @@ -143,6 +144,7 @@ importFrom(fs,file_delete) importFrom(fs,file_exists) importFrom(fs,path) importFrom(fs,path_abs) +importFrom(fs,path_home) importFrom(git2rdata,is_git2rmeta) importFrom(git2rdata,update_metadata) importFrom(git2rdata,verify_vc) diff --git a/R/connect_inbo_s3.R b/R/connect_inbo_s3.R new file mode 100644 index 00000000..b5d21c4c --- /dev/null +++ b/R/connect_inbo_s3.R @@ -0,0 +1,37 @@ +#' Set environment variables for INBO S3 bucket +#' +#' Before running this function you must have an `.aws` folder in your home +#' directory with a `credentials` file containing the credentials for the INBO +#' shared infrastructure. +#' Run the `aws assume role` command to get the credentials for the INBO shared +#' infrastructure before running this function. +#' +#' @export +#' @importFrom fs dir_exists path path_home +#' @importFrom stats setNames +#' @importFrom utils head tail +connect_inbo_s3 <- function() { + c(unix = ".aws", windows = "../.aws")[.Platform$OS.type] |> + path_home() -> aws_dir + stopifnot("no `.aws` folder found" = dir_exists(aws_dir)) + # Read the credentials file + path(aws_dir, "credentials") |> + readLines() -> creds + # keep credentials related to the role + role <- grep("\\[inbo-shared-infra", creds) + stopifnot("no role found" = length(role) == 1) + tail(creds, -role) -> creds + grep("\\[", creds) |> + c(length(creds) + 1) |> + min() -> other + head(creds, other - 1) -> creds + # set environment variables + gsub("(.*) = (.*)", "\\1", creds) |> + toupper() |> + setNames(object = creds) |> + gsub(pattern = "(.*) = (.*)", replacement = "\\2", x = _) |> + c(AWS_DEFAULT_REGION = "eu-west-1") |> + as.list() |> + do.call(what = Sys.setenv) + return(invisible(NULL)) +} diff --git a/man/connect_inbo_s3.Rd b/man/connect_inbo_s3.Rd new file mode 100644 index 00000000..bddd33c6 --- /dev/null +++ b/man/connect_inbo_s3.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/connect_inbo_s3.R +\name{connect_inbo_s3} +\alias{connect_inbo_s3} +\title{Set environment variables for INBO S3 bucket} +\usage{ +connect_inbo_s3() +} +\description{ +Before running this function you must have an \code{.aws} folder in your home +directory with a \code{credentials} file containing the credentials for the INBO +shared infrastructure. +Run the \verb{aws assume role} command to get the credentials for the INBO shared +infrastructure before running this function. +} From b48b8f08f3a0dc042982345500b7e88b9ebf0649 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Thu, 2 Oct 2025 15:38:57 +0200 Subject: [PATCH 16/16] =?UTF-8?q?=F0=9F=91=B7=20Use=20AWS=20role?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/check_on_branch.yml | 9 ++++++--- .github/workflows/check_on_different_r_os.yml | 13 ++++++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/.github/workflows/check_on_branch.yml b/.github/workflows/check_on_branch.yml index 70812d8e..56e41486 100644 --- a/.github/workflows/check_on_branch.yml +++ b/.github/workflows/check_on_branch.yml @@ -14,10 +14,13 @@ jobs: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} N2KBUCKET: ${{ secrets.N2KBUCKET }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} permissions: + id-token: write contents: read steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::347082780157:role/inbo-n2kmonitoring-shared-infra-eu-west-1-unittest-role + aws-region: eu-west-1 - uses: inbo/actions/check_pkg@main diff --git a/.github/workflows/check_on_different_r_os.yml b/.github/workflows/check_on_different_r_os.yml index bb243621..0fdce605 100644 --- a/.github/workflows/check_on_different_r_os.yml +++ b/.github/workflows/check_on_different_r_os.yml @@ -30,11 +30,12 @@ jobs: _R_CHECK_SYSTEM_CLOCK_: false RSPM: ${{ matrix.config.rspm }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} N2KBUCKET: ${{ secrets.N2KBUCKET }} + permissions: + id-token: write + contents: read + steps: - uses: actions/checkout@v3 @@ -50,6 +51,12 @@ jobs: extra-packages: any::rcmdcheck needs: check + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::347082780157:role/inbo-n2kmonitoring-shared-infra-eu-west-1-unittest-role + aws-region: eu-west-1 + - uses: r-lib/actions/check-r-package@v2 with: error-on: '"error"'