WorldHealthOrganization · rkumapley · Jan 26, 2026 · Dec 15, 2025 · Jan 21, 2026
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -29,6 +29,7 @@ Collate:
     'utils.R'
     'prevalence-survey.R'
     'prevalence-simple.R'
+    'prevalence-hybrid-sw.R'
     'assertions.R'
     'prevalence.R'
     'z-score-helper.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,13 +1,18 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(column_names,hybrid_design)
 S3method(column_names,simple_design)
 S3method(column_names,survey_design)
+S3method(column_values,hybrid_design)
 S3method(column_values,simple_design)
 S3method(column_values,survey_design)
+S3method(compute_prevalence_estimates_for_column_by,hybrid_design)
 S3method(compute_prevalence_estimates_for_column_by,simple_design)
 S3method(compute_prevalence_estimates_for_column_by,survey_design)
+S3method(compute_prevalence_sample_size_by,hybrid_design)
 S3method(compute_prevalence_sample_size_by,simple_design)
 S3method(compute_prevalence_sample_size_by,survey_design)
+S3method(compute_prevalence_zscore_summaries_by,hybrid_design)
 S3method(compute_prevalence_zscore_summaries_by,simple_design)
 S3method(compute_prevalence_zscore_summaries_by,survey_design)
 export(anthro_api_compute_prevalence)

diff --git a/NEWS.md b/NEWS.md
@@ -14,8 +14,10 @@
 
 ## Performance
 
-* For inputs with `cluster/strata = NULL` and `sw = NULL or 1` a faster
-  method was added for this special case to compute `anthro_prevalence`.
+* For inputs with `cluster/strata = NULL` a faster
+  method was added for this special case to compute `anthro_prevalence`. This
+  method yields the same results within an observed tolerance of `< 0.0001` on
+  test data.
 
 ## Bugfix
 

diff --git a/R/prevalence-hybrid-sw.R b/R/prevalence-hybrid-sw.R
@@ -0,0 +1,105 @@
+#' The following is a hybrid backed for cases with a sampling weight > 0.
+#' It recreates the estimates for z-score summaries and population estimates,
+#' but uses the survey package to compute estimates around the 0/1 indicators.
+#'
+#' The z-score estimates follow the method uses in the {survey} package by
+#' T. Lumley.
+#'
+#' Reference:
+#' T. Lumley (2024) "survey: analysis of complex survey samples".
+#' R package version 4.4.
+#' @noRd
+
+#' @export
+column_names.hybrid_design <- function(x) {
+  colnames(x$data)
+}
+
+#' @export
+column_values.hybrid_design <- function(x, col) {
+  x$data[[col]]
+}
+
+#' @export
+#' @include prevalence-simple.R
+compute_prevalence_zscore_summaries_by.hybrid_design <- function(
+  data,
+  indicator,
+  subset_col_name
+) {
+  zscore_col_name <- prev_zscore_value_column(indicator)
+  compute_and_aggregate(
+    data,
+    zscore_col_name,
+    subset_col_name,
+    compute = zscore_estimate_weighted,
+    empty_data_prototype = data.frame(
+      r = NA_real_,
+      se = NA_real_,
+      ll = NA_real_,
+      ul = NA_real_,
+      stdev = NA_real_
+    )
+  )
+}
+
+#' @export
+compute_prevalence_estimates_for_column_by.hybrid_design <- compute_prevalence_estimates_for_column_by.survey_design
+
+#' @export
+#' @include prevalence-simple.R
+compute_prevalence_sample_size_by.hybrid_design <- function(
+  data,
+  indicator,
+  subset_col_name
+) {
+  column_name <- prev_prevalence_column_name(indicator)
+  compute_and_aggregate(
+    data,
+    column_name,
+    subset_col_name,
+    compute = sample_size_weighted,
+    empty_data_prototype = data.frame(
+      pop = 0,
+      unwpop = 0
+    )
+  )
+}
+
+#' @importFrom stats sd plogis qt
+zscore_estimate_weighted <- function(x, N, weights, empty_data_prototype) {
+  non_na <- !is.na(x)
+  x <- x[non_na]
+  if (length(x) == 0) {
+    return(empty_data_prototype)
+  }
+  w <- weights[non_na]
+  n <- length(x)
+  sw <- sum(w)
+  r <- sum(w * x) / sw
+  w_normalized <- w / sw
+  stdev <- sqrt(sum(w_normalized * (x - r)^2) * n / (n - 1))
+  se <- sample_se_weights(x, w, r, sw, N)
+  t_value <- qt(1 - prevalence_significance_level / 2, N - 1)
+  cis <- r + se * c(-t_value, t_value)
+  data.frame(
+    r = r,
+    se = se,
+    ll = cis[1L],
+    ul = cis[2L],
+    stdev = stdev
+  )
+}
+
+sample_size_weighted <- function(x, N, weights, empty_data_prototype) {
+  x <- as.numeric(!is.na(x))
+  pop <- as.numeric(sum(x * weights, na.rm = TRUE))
+  n <- as.numeric(sum(x))
+  data.frame(pop = pop, unwpop = n)
+}
+
+sample_se_weights <- function(x, weights, x_mean, n, N) {
+  scale <- N / (N - 1)
+  x_deviation <- weights * (x - x_mean)
+  sqrt(scale) / n * sqrt(sum(x_deviation * x_deviation))
+}
diff --git a/R/prevalence-simple.R b/R/prevalence-simple.R
@@ -93,6 +93,8 @@ compute_and_aggregate <- function(
 ) {
   col_values <- column_values(data, value_column)
   N <- length(col_values)
+  weights <- data$data[["sampling_weights"]]
+  has_weights <- !is.null(weights)
   grouping <- column_values(data, subset_column)
   groups <- if (is.factor(grouping)) {
     levels(grouping)
@@ -103,11 +105,21 @@ compute_and_aggregate <- function(
   values <- vector(mode = "list", length(groups))
   names(values) <- groups
   for (group in groups) {
-    values[[group]] <- compute(
-      col_values[grouping == group],
-      N = N,
-      empty_data_prototype = empty_data_prototype
-    )
+    values[[group]] <- if (has_weights) {
+      subroup <- grouping == group
+      compute(
+        col_values[subroup],
+        N = N,
+        weights = weights[subroup],
+        empty_data_prototype = empty_data_prototype
+      )
+    } else {
+      compute(
+        col_values[grouping == group],
+        N = N,
+        empty_data_prototype = empty_data_prototype
+      )
+    }
   }
   Group <- names(values)
   data <- do.call(rbind, values)

diff --git a/R/prevalence.R b/R/prevalence.R
@@ -165,6 +165,7 @@
 #' @include anthro-package.R
 #' @include assertions.R
 #' @include prevalence-simple.R
+#' @include prevalence-hybrid-sw.R
 #' @include prevalence-survey.R
 #' @importFrom stats confint
 #' @importFrom stats as.formula
@@ -562,6 +563,12 @@ generate_cutoffs <- function(data_column) {
   })
 }
 
+#' build_design constructs a custom design object that is used to compute
+#' all indicators.
+#' The function decides if the survey package is used for all indicator
+#' or if a specialized method can be used to compute the results faster. For
+#' example when there are no cluster/strata and sampling weights.
+#' @noRd
 build_design <- function(survey_data) {
   # we can override the choice here using internal options.
   # these options are not part of the public api and can change at any time.
@@ -575,6 +582,11 @@ build_design <- function(survey_data) {
   if (is_simple_problem) {
     return(build_simple_design(survey_data))
   }
+  is_simple_problem_with_sw <- is.null(survey_data[["cluster"]]) &&
+    is.null(survey_data[["strata"]])
+  if (is_simple_problem_with_sw) {
+    return(build_hybrid_design(survey_data))
+  }
   build_survey_design(survey_data)
 }
 
@@ -910,6 +922,25 @@ build_simple_design <- function(survey_data) {
   )
 }
 
+build_hybrid_design <- function(survey_data) {
+  # for cases with sw > 0 but cluster/strata = NULL, we just need
+  # a single survey design object
+  design <- svydesign(
+    ids = ~1,
+    strata = NULL,
+    weights = ~sampling_weights,
+    data = survey_data,
+    nest = TRUE
+  )
+  structure(
+    list(
+      data = survey_data,
+      design = design
+    ),
+    class = "hybrid_design"
+  )
+}
+
 #' @importFrom survey svydesign
 build_survey_design <- function(survey_data) {
   has_cluster_info <- !is.null(survey_data[["cluster"]])

diff --git a/README.md b/README.md
@@ -55,16 +55,16 @@ anthro_zscores(
   weight = c(18, 15, 10, 15),
   lenhei = c(120, 80, 100, 100)
 )
-#>   clenhei    cbmi cmeasure csex  zlen flen  zwei fwei  zwfl fwfl  zbmi fbmi zhc
-#> 1     120 12.5000     <NA>    1  7.31    1  2.20    0 -2.39    0 -3.01    0  NA
-#> 2      80 23.4375     <NA>    2 -3.50    0  0.95    0  4.13    0  4.66    0  NA
-#> 3     100 10.0000     <NA>    1  1.62    0 -2.76    0 -5.19    1 -5.61    1  NA
-#> 4     100 15.0000     <NA>    1  1.70    0  0.69    0 -0.29    0 -0.58    0  NA
-#>   fhc zac fac zts fts zss fss
-#> 1  NA  NA  NA  NA  NA  NA  NA
-#> 2  NA  NA  NA  NA  NA  NA  NA
-#> 3  NA  NA  NA  NA  NA  NA  NA
-#> 4  NA  NA  NA  NA  NA  NA  NA
+#>   clenhei cmeasure c9mo_flag    cbmi csex  zlen flen  zwei fwei  zwfl fwfl
+#> 1     120     <NA>         0 12.5000    1  7.31    1  2.20    0 -2.39    0
+#> 2      80     <NA>         0 23.4375    2 -3.50    0  0.95    0  4.13    0
+#> 3     100     <NA>         0 10.0000    1  1.62    0 -2.76    0 -5.19    1
+#> 4     100     <NA>         0 15.0000    1  1.70    0  0.69    0 -0.29    0
+#>    zbmi fbmi zhc fhc zac fac zts fts zss fss
+#> 1 -3.01    0  NA  NA  NA  NA  NA  NA  NA  NA
+#> 2  4.66    0  NA  NA  NA  NA  NA  NA  NA  NA
+#> 3 -5.61    1  NA  NA  NA  NA  NA  NA  NA  NA
+#> 4 -0.58    0  NA  NA  NA  NA  NA  NA  NA  NA
 ```
 
 The returned value is a `data.frame` that can further be processed or