Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Collate:
'utils.R'
'prevalence-survey.R'
'prevalence-simple.R'
'prevalence-hybrid-sw.R'
'assertions.R'
'prevalence.R'
'z-score-helper.R'
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
# Generated by roxygen2: do not edit by hand

S3method(column_names,hybrid_design)
S3method(column_names,simple_design)
S3method(column_names,survey_design)
S3method(column_values,hybrid_design)
S3method(column_values,simple_design)
S3method(column_values,survey_design)
S3method(compute_prevalence_estimates_for_column_by,hybrid_design)
S3method(compute_prevalence_estimates_for_column_by,simple_design)
S3method(compute_prevalence_estimates_for_column_by,survey_design)
S3method(compute_prevalence_sample_size_by,hybrid_design)
S3method(compute_prevalence_sample_size_by,simple_design)
S3method(compute_prevalence_sample_size_by,survey_design)
S3method(compute_prevalence_zscore_summaries_by,hybrid_design)
S3method(compute_prevalence_zscore_summaries_by,simple_design)
S3method(compute_prevalence_zscore_summaries_by,survey_design)
export(anthro_api_compute_prevalence)
Expand Down
6 changes: 4 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@

## Performance

* For inputs with `cluster/strata = NULL` and `sw = NULL or 1` a faster
method was added for this special case to compute `anthro_prevalence`.
* For inputs with `cluster/strata = NULL` a faster
method was added for this special case to compute `anthro_prevalence`. This
method yields the same results within an observed tolerance of `< 0.0001` on
test data.

## Bugfix

Expand Down
105 changes: 105 additions & 0 deletions R/prevalence-hybrid-sw.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#' The following is a hybrid backed for cases with a sampling weight > 0.
#' It recreates the estimates for z-score summaries and population estimates,
#' but uses the survey package to compute estimates around the 0/1 indicators.
#'
#' The z-score estimates follow the method uses in the {survey} package by
#' T. Lumley.
#'
#' Reference:
#' T. Lumley (2024) "survey: analysis of complex survey samples".
#' R package version 4.4.
#' @noRd

#' @export
column_names.hybrid_design <- function(x) {
colnames(x$data)
}

#' @export
column_values.hybrid_design <- function(x, col) {
x$data[[col]]
}

#' @export
#' @include prevalence-simple.R
compute_prevalence_zscore_summaries_by.hybrid_design <- function(
data,
indicator,
subset_col_name
) {
zscore_col_name <- prev_zscore_value_column(indicator)
compute_and_aggregate(
data,
zscore_col_name,
subset_col_name,
compute = zscore_estimate_weighted,
empty_data_prototype = data.frame(
r = NA_real_,
se = NA_real_,
ll = NA_real_,
ul = NA_real_,
stdev = NA_real_
)
)
}

#' @export
compute_prevalence_estimates_for_column_by.hybrid_design <- compute_prevalence_estimates_for_column_by.survey_design

#' @export
#' @include prevalence-simple.R
compute_prevalence_sample_size_by.hybrid_design <- function(
data,
indicator,
subset_col_name
) {
column_name <- prev_prevalence_column_name(indicator)
compute_and_aggregate(
data,
column_name,
subset_col_name,
compute = sample_size_weighted,
empty_data_prototype = data.frame(
pop = 0,
unwpop = 0
)
)
}

#' @importFrom stats sd plogis qt
zscore_estimate_weighted <- function(x, N, weights, empty_data_prototype) {
non_na <- !is.na(x)
x <- x[non_na]
if (length(x) == 0) {
return(empty_data_prototype)
}
w <- weights[non_na]
n <- length(x)
sw <- sum(w)
r <- sum(w * x) / sw
w_normalized <- w / sw
stdev <- sqrt(sum(w_normalized * (x - r)^2) * n / (n - 1))
se <- sample_se_weights(x, w, r, sw, N)
t_value <- qt(1 - prevalence_significance_level / 2, N - 1)
cis <- r + se * c(-t_value, t_value)
data.frame(
r = r,
se = se,
ll = cis[1L],
ul = cis[2L],
stdev = stdev
)
}

sample_size_weighted <- function(x, N, weights, empty_data_prototype) {
x <- as.numeric(!is.na(x))
pop <- as.numeric(sum(x * weights, na.rm = TRUE))
n <- as.numeric(sum(x))
data.frame(pop = pop, unwpop = n)
}

sample_se_weights <- function(x, weights, x_mean, n, N) {
scale <- N / (N - 1)
x_deviation <- weights * (x - x_mean)
sqrt(scale) / n * sqrt(sum(x_deviation * x_deviation))
}
22 changes: 17 additions & 5 deletions R/prevalence-simple.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ compute_and_aggregate <- function(
) {
col_values <- column_values(data, value_column)
N <- length(col_values)
weights <- data$data[["sampling_weights"]]
has_weights <- !is.null(weights)
grouping <- column_values(data, subset_column)
groups <- if (is.factor(grouping)) {
levels(grouping)
Expand All @@ -103,11 +105,21 @@ compute_and_aggregate <- function(
values <- vector(mode = "list", length(groups))
names(values) <- groups
for (group in groups) {
values[[group]] <- compute(
col_values[grouping == group],
N = N,
empty_data_prototype = empty_data_prototype
)
values[[group]] <- if (has_weights) {
subroup <- grouping == group
compute(
col_values[subroup],
N = N,
weights = weights[subroup],
empty_data_prototype = empty_data_prototype
)
} else {
compute(
col_values[grouping == group],
N = N,
empty_data_prototype = empty_data_prototype
)
}
}
Group <- names(values)
data <- do.call(rbind, values)
Expand Down
31 changes: 31 additions & 0 deletions R/prevalence.R
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@
#' @include anthro-package.R
#' @include assertions.R
#' @include prevalence-simple.R
#' @include prevalence-hybrid-sw.R
#' @include prevalence-survey.R
#' @importFrom stats confint
#' @importFrom stats as.formula
Expand Down Expand Up @@ -562,6 +563,12 @@ generate_cutoffs <- function(data_column) {
})
}

#' build_design constructs a custom design object that is used to compute
#' all indicators.
#' The function decides if the survey package is used for all indicator
#' or if a specialized method can be used to compute the results faster. For
#' example when there are no cluster/strata and sampling weights.
#' @noRd
build_design <- function(survey_data) {
# we can override the choice here using internal options.
# these options are not part of the public api and can change at any time.
Expand All @@ -575,6 +582,11 @@ build_design <- function(survey_data) {
if (is_simple_problem) {
return(build_simple_design(survey_data))
}
is_simple_problem_with_sw <- is.null(survey_data[["cluster"]]) &&
is.null(survey_data[["strata"]])
if (is_simple_problem_with_sw) {
return(build_hybrid_design(survey_data))
}
build_survey_design(survey_data)
}

Expand Down Expand Up @@ -910,6 +922,25 @@ build_simple_design <- function(survey_data) {
)
}

build_hybrid_design <- function(survey_data) {
# for cases with sw > 0 but cluster/strata = NULL, we just need
# a single survey design object
design <- svydesign(
ids = ~1,
strata = NULL,
weights = ~sampling_weights,
data = survey_data,
nest = TRUE
)
structure(
list(
data = survey_data,
design = design
),
class = "hybrid_design"
)
}

#' @importFrom survey svydesign
build_survey_design <- function(survey_data) {
has_cluster_info <- !is.null(survey_data[["cluster"]])
Expand Down
20 changes: 10 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,16 @@ anthro_zscores(
weight = c(18, 15, 10, 15),
lenhei = c(120, 80, 100, 100)
)
#> clenhei cbmi cmeasure csex zlen flen zwei fwei zwfl fwfl zbmi fbmi zhc
#> 1 120 12.5000 <NA> 1 7.31 1 2.20 0 -2.39 0 -3.01 0 NA
#> 2 80 23.4375 <NA> 2 -3.50 0 0.95 0 4.13 0 4.66 0 NA
#> 3 100 10.0000 <NA> 1 1.62 0 -2.76 0 -5.19 1 -5.61 1 NA
#> 4 100 15.0000 <NA> 1 1.70 0 0.69 0 -0.29 0 -0.58 0 NA
#> fhc zac fac zts fts zss fss
#> 1 NA NA NA NA NA NA NA
#> 2 NA NA NA NA NA NA NA
#> 3 NA NA NA NA NA NA NA
#> 4 NA NA NA NA NA NA NA
#> clenhei cmeasure c9mo_flag cbmi csex zlen flen zwei fwei zwfl fwfl
#> 1 120 <NA> 0 12.5000 1 7.31 1 2.20 0 -2.39 0
#> 2 80 <NA> 0 23.4375 2 -3.50 0 0.95 0 4.13 0
#> 3 100 <NA> 0 10.0000 1 1.62 0 -2.76 0 -5.19 1
#> 4 100 <NA> 0 15.0000 1 1.70 0 0.69 0 -0.29 0
#> zbmi fbmi zhc fhc zac fac zts fts zss fss
#> 1 -3.01 0 NA NA NA NA NA NA NA NA
#> 2 4.66 0 NA NA NA NA NA NA NA NA
#> 3 -5.61 1 NA NA NA NA NA NA NA NA
#> 4 -0.58 0 NA NA NA NA NA NA NA NA
```

The returned value is a `data.frame` that can further be processed or
Expand Down
Loading