A tbl_svy
wraps a locally stored svydesign and adds methods for
dplyr single-table verbs like mutate
, group_by
and
summarise
. Create a tbl_svy
using as_survey_design
.
tbl_df
implements these methods from dplyr.
library(survey)
library(dplyr)
data(api)
svy <- as_survey_design(apistrat, strata = stype, weights = pw)
svy
#> Stratified Independent Sampling design (with replacement)
#> Called via srvyr
#> Sampling variables:
#> - ids: `1`
#> - strata: stype
#> - weights: pw
#> Data variables:
#> - cds (chr), stype (fct), name (chr), sname (chr), snum (dbl), dname (chr),
#> dnum (int), cname (chr), cnum (int), flag (int), pcttest (int), api00
#> (int), api99 (int), target (int), growth (int), sch.wide (fct), comp.imp
#> (fct), both (fct), awards (fct), meals (int), ell (int), yr.rnd (fct),
#> mobility (int), acs.k3 (int), acs.46 (int), acs.core (int), pct.resp (int),
#> not.hsg (int), hsg (int), some.col (int), col.grad (int), grad.sch (int),
#> avg.ed (dbl), full (int), emer (int), enroll (int), api.stu (int), pw
#> (dbl), fpc (dbl)
# Data manipulation verbs ---------------------------------------------------
filter(svy, pcttest > 95)
#> Stratified Independent Sampling design (with replacement)
#> Called via srvyr
#> Sampling variables:
#> - ids: `1`
#> - strata: stype
#> - weights: pw
#> Data variables:
#> - cds (chr), stype (fct), name (chr), sname (chr), snum (dbl), dname (chr),
#> dnum (int), cname (chr), cnum (int), flag (int), pcttest (int), api00
#> (int), api99 (int), target (int), growth (int), sch.wide (fct), comp.imp
#> (fct), both (fct), awards (fct), meals (int), ell (int), yr.rnd (fct),
#> mobility (int), acs.k3 (int), acs.46 (int), acs.core (int), pct.resp (int),
#> not.hsg (int), hsg (int), some.col (int), col.grad (int), grad.sch (int),
#> avg.ed (dbl), full (int), emer (int), enroll (int), api.stu (int), pw
#> (dbl), fpc (dbl)
select(svy, starts_with("acs")) # variables used in survey design are automatically kept
#> Stratified Independent Sampling design (with replacement)
#> Called via srvyr
#> Sampling variables:
#> - ids: `1`
#> - strata: stype
#> - weights: pw
#> Data variables:
#> - acs.k3 (int), acs.46 (int), acs.core (int)
summarise(svy, col.grad = survey_mean(col.grad))
#> # A tibble: 1 × 2
#> col.grad col.grad_se
#> <dbl> <dbl>
#> 1 19.9 1.06
mutate(svy, api_diff = api00 - api99)
#> Stratified Independent Sampling design (with replacement)
#> Called via srvyr
#> Sampling variables:
#> - ids: `1`
#> - strata: stype
#> - weights: pw
#> Data variables:
#> - cds (chr), stype (fct), name (chr), sname (chr), snum (dbl), dname (chr),
#> dnum (int), cname (chr), cnum (int), flag (int), pcttest (int), api00
#> (int), api99 (int), target (int), growth (int), sch.wide (fct), comp.imp
#> (fct), both (fct), awards (fct), meals (int), ell (int), yr.rnd (fct),
#> mobility (int), acs.k3 (int), acs.46 (int), acs.core (int), pct.resp (int),
#> not.hsg (int), hsg (int), some.col (int), col.grad (int), grad.sch (int),
#> avg.ed (dbl), full (int), emer (int), enroll (int), api.stu (int), pw
#> (dbl), fpc (dbl), api_diff (int)
# Group by operations -------------------------------------------------------
# To calculate survey
svy_group <- group_by(svy, dname)
summarise(svy, col.grad = survey_mean(col.grad),
api00 = survey_mean(api00, vartype = "ci"))
#> # A tibble: 1 × 5
#> col.grad col.grad_se api00 api00_low api00_upp
#> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 19.9 1.06 662. 643. 681.