Select a proportionate stratified random sample, where stratification is based on sites and gender

59 Views Asked by At

I have three IDBs and this is the number of people registered from each

Site     female  Male  Total
IDB_A     46      14    60
IDB_B     17      23    40
IDB_C     79      21    100
Total     142     58    200

And this is the sample I want to select from each site

Site     female  Male  Total
IDB_A     20      6     26
IDB_B     7       10    17
IDB_C     34      9    43
Total     60     25    85

And I used the following code by creating three different strata (one for each site) and then selected a random sample from each stratum

str1 <- FBF_PDM[FBF_PDM$Sites=="IDB_A",]
str2 <- FBF_PDM[FBF_PDM$Sites=="IDB_B", ]
str3 <- FBF_PDM[FBF_PDM$Sites=="IDB_C", ]

sample1 <- str1[sample(1:nrow(str1), 26, replace = FALSE), ]
sample2 <- str2[sample(1:nrow(str2), 17, replace = FALSE), ]
sample3 <- str3[sample(1:nrow(str3), 43, replace = FALSE), ]

overall <- rbind(sample1, sample2, sample3)

write.table(overall, "overall2.csv", row.names = FALSE, sep = ",")

However, Am struggling to specify the gender (male, female) to be selected from each site.

1

There are 1 best solutions below

2
On

Using Map you could apply a subsetted sample function to your desired plan. Together with a splited data set by site, provide sample sizes for the respective sex.

plan
#    Site female Male Total
# 1 IDB_A     20    6    26
# 2 IDB_B      7   10    17
# 3 IDB_C     34    9    43
# 4 Total     60   25    85


set.seed(42)  ## you should set a seed here to make it reproducible
samp <- Map(\(x, y, z)
            rbind(
              x[x$female == 1, ][sample(sum(x$female == 1), y), ],
              x[x$female == 0, ][sample(sum(x$female == 0), z), ]
            ), split(dat, dat$site), plan[-4, ]$female, plan[-4, ]$Male)

## check
lapply(samp, \(x) with(x, table(female)))
# $IDB_A
# female
# 0  1 
# 6 20 
# 
# $IDB_B
# female
# 0  1 
# 10  7 
# 
# $IDB_C
# female
# 0  1 
# 9 34 

Data:

dat <- structure(list(site = c("IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", 
"IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", 
"IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", 
"IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", 
"IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", 
"IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", 
"IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", 
"IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", 
"IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_A", "IDB_B", 
"IDB_B", "IDB_B", "IDB_B", "IDB_B", "IDB_B", "IDB_B", "IDB_B", 
"IDB_B", "IDB_B", "IDB_B", "IDB_B", "IDB_B", "IDB_B", "IDB_B", 
"IDB_B", "IDB_B", "IDB_B", "IDB_B", "IDB_B", "IDB_B", "IDB_B", 
"IDB_B", "IDB_B", "IDB_B", "IDB_B", "IDB_B", "IDB_B", "IDB_B", 
"IDB_B", "IDB_B", "IDB_B", "IDB_B", "IDB_B", "IDB_B", "IDB_B", 
"IDB_B", "IDB_B", "IDB_B", "IDB_B", "IDB_C", "IDB_C", "IDB_C", 
"IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", 
"IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", 
"IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", 
"IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", 
"IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", 
"IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", 
"IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", 
"IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", 
"IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", 
"IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", 
"IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", 
"IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", 
"IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", 
"IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C", "IDB_C"), female = c(1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), X = c(-1.80440994496324, -1.12182260635522, 
-0.34792960655914, 1.23890214863198, -0.274197564852279, 0.162380715558193, 
-0.0646069097530627, -0.705237097025142, 1.36219726875099, -1.09651318808332, 
-0.228433518546891, -0.347828080610269, 0.532128619506733, 1.60723456226119, 
0.513814526116594, 1.38237316057303, 0.763097048549903, -0.624584544634111, 
0.0815438004289869, 1.37607911136477, -1.56126578615704, 0.324770660594699, 
-0.156790316952238, 0.877783860734352, 0.750166575180638, 0.30105340384283, 
1.49281111722931, -1.5254938001697, 0.910717558486022, -1.57953202785754, 
0.587716257118581, 0.0896422960007244, 0.96726165694984, 0.0788123529148234, 
-1.56869983648609, -2.00782318401609, 0.540973524238763, -0.0733765981084255, 
-0.571018393558081, -0.311068459853641, -0.671415067101365, -0.157340452080979, 
-0.931305070999869, -1.98300947947013, -0.219599804249378, 1.04527586946239, 
1.87732956629967, 0.00260619595716637, -0.0806699347091655, 0.96298228123987, 
0.0535710165465174, -0.434898408910037, -1.73729725534231, -1.26369559978243, 
0.406308512069273, -1.45965396777976, 1.0484573701547, -1.3464305394502, 
-0.193570558113494, -0.00233595711367605, -0.0128297338671881, 
0.151947321984398, 0.598511131401079, -0.126212437298279, -0.248535565846776, 
0.160327394904622, -0.433641941597269, 1.53741241910027, -2.17024657661608, 
1.02700461929143, -0.248482933189933, 0.422320385832393, 0.987653293639674, 
0.835568172035023, -0.660521859045705, 1.56406949329184, -1.62297593529155, 
0.863896373478988, -0.5116027734362, -1.9173650254454, -1.86581384861995, 
0.245179063439689, 2.22353430190807, 0.273376064144465, 1.13078479452828, 
0.838669374060099, -0.654615277878206, 0.953960978479396, 0.352951465489323, 
0.206599292649613, 1.00111323431025, 0.747451991640276, -0.626574836723139, 
0.395223685936714, -0.892167954045577, 0.630818410091773, -0.432705183284567, 
0.452138644218517, 0.367999045196345, -0.270387489661909, 0.465512617712979, 
0.574356225626969, -0.230699990157411, 1.17224250830245, 1.39270270673312, 
-0.661991255709585, -0.777369206725014, 0.51353857890878, -0.913312238041272, 
-0.449423805250301, 0.802932699103618, -0.573476850981104, -1.92812516784833, 
0.664390833822148, -1.60254022379425, -1.35460025745385, -3.0179326794179, 
0.831237822230063, 0.251097088847344, 0.462293465941329, 0.844792223223237, 
-0.0419715243215621, -1.10557590570003, 0.563775652330874, 1.30336475581967, 
-1.50022093779466, -0.606989235119036, -0.292245065656577, -1.2896833433236, 
0.694105848774699, -0.599181902897119, 1.25690664397164, 0.053508013627363, 
0.728092517671852, 1.56109806821061, 0.26562475359719, 1.07672625920727, 
0.210697931679245, -1.51167352582237, 0.022402259913625, 0.718136206042103, 
0.48945701784227, -0.17388834587827, -1.2176994886684, 0.646398368709462, 
-0.91645602834789, -1.25182344242557, 0.594927718405501, -1.23281066518983, 
0.244364414906138, 0.00277218534359506, -1.32820968602556, 1.17969641183277, 
-0.592804999148929, 1.19997831633667, -0.475033679612135, -0.575057096150816, 
-0.031226024608499, -0.358056995978456, -0.356601077647731, -0.877664383433726, 
-1.21289712517796, 0.613286618687034, -0.806203341462477, -1.3764569299089, 
-0.507847899163547, -0.800935486737338, -2.19278568567007, -0.290937149267675, 
0.167174121160518, 0.294692356485688, 0.392741265420726, -1.00084371300082, 
-0.325727119823434, -1.00834880542884, -0.635431482033766, -1.20984068798734, 
-1.11646380065269, 0.629881162575935, -0.272521570178626, -0.258841168378671, 
1.72955817989149, -0.0583921654073717, -0.537063784608919, 0.74728669616574, 
-0.487257835389733, 1.3729077814795, -0.377672360646221, -0.616152614118394, 
-1.16812505107469, 0.328640358591086, 1.46651062744016, -0.356009545088913, 
0.261467641877431, 0.333328855175731, 1.42219324030954, 0.663876600666173, 
-1.07365515569531, -0.69690177466674, -0.746130456934561)), row.names = c(NA, 
-200L), class = "data.frame")

plan <- read.table(header=TRUE, text='Site     female  Male  Total
IDB_A     20      6     26
IDB_B     7       10    17
IDB_C     34      9    43
Total     60     25    85')