Sampling

A short description of the post.

tactile_prop_red
# A tibble: 33 x 4
   group            replicate red_balls prop_red
   <chr>                <int>     <int>    <dbl>
 1 Ilyas, Yohan             1        21     0.42
 2 Morgan, Terrance         2        17     0.34
 3 Martin, Thomas           3        21     0.42
 4 Clark, Frank             4        21     0.42
 5 Riddhi, Karina           5        18     0.36
 6 Andrew, Tyler            6        19     0.38
 7 Julia                    7        19     0.38
 8 Rachel, Lauren           8        11     0.22
 9 Daniel, Caroline         9        15     0.3 
10 Josh, Maeve             10        17     0.34
# ... with 23 more rows
ggplot(tactile_prop_red, aes(x = prop_red)) +
  geom_histogram(binwidth = 0.05, boundary = 0.4, color = "white") +
  labs(x = "Proportion of 50 balls that were red", 
       title = "Distribution of 33 proportions red") 

#Why was it important to mix the bowl before we sampled the balls? The bowl has a large population of balls and we do not want to count each individual one, so instead we mix them before being sampled to ensure that we get a random sample out of the population.

#why is it that our 33 groups of friends did not all have the same numbers of balls that were red out of 50, and hence different proportions red? We created an estimate of the population by sampling 50 balls 33 times. Each time we ran a sample and returned the balls to the bowl to make sure the next sample is unbiased and random. Since the population is unbiased and random we get different number of red balls in each sample taken, hence the difference in proportions of red.

bowl
# A tibble: 2,400 x 2
   ball_ID color
     <int> <chr>
 1       1 white
 2       2 white
 3       3 white
 4       4 red  
 5       5 white
 6       6 white
 7       7 red  
 8       8 white
 9       9 red  
10      10 white
# ... with 2,390 more rows
# Segment 1: sample size = 28 ------------------------------
# 1.a) Virtually use shovel 1150 times
virtual_samples_28 <- bowl %>% 
  rep_sample_n(size = 28, reps = 1150)

# 1.b) Compute resulting 1150 replicates of proportion red
virtual_prop_red_28 <- virtual_samples_28 %>% 
  group_by(replicate) %>% 
  summarize(red = sum(color == "red")) %>% 
  mutate(prop_red = red / 28)

# 1.c) Plot distribution via a histogram
ggplot(virtual_prop_red_28, aes(x = prop_red)) +
  geom_histogram(binwidth = 0.05, boundary = 0.4, color = "white") +
  labs(x = "Proportion of 28 balls that were red", title = "28") 
# Segment 2: sample size = 53 ------------------------------
# 2.a) Virtually use shovel 1150 times
virtual_samples_53 <- bowl %>% 
  rep_sample_n(size = 53, reps = 1150)

# 2.b) Compute resulting 1150 replicates of proportion red
virtual_prop_red_53 <- virtual_samples_53 %>% 
  group_by(replicate) %>% 
  summarize(red = sum(color == "red")) %>% 
  mutate(prop_red = red / 53)

# 2.c) Plot distribution via a histogram
ggplot(virtual_prop_red_53, aes(x = prop_red)) +
  geom_histogram(binwidth = 0.05, boundary = 0.4, color = "white") +
  labs(x = "Proportion of 53 balls that were red", title = "53")  
# Segment 3: sample size = 118 ------------------------------
# 3.a) Virtually using shovel with 118 slots 1150 times
virtual_samples_118 <- bowl %>% 
  rep_sample_n(size = 118, reps = 1150)

# 3.b) Compute resulting 1150 replicates of proportion red
virtual_prop_red_118 <- virtual_samples_118 %>% 
  group_by(replicate) %>% 
  summarize(red = sum(color == "red")) %>% 
  mutate(prop_red = red / 118)

# 3.c) Plot distribution via a histogram
ggplot(virtual_prop_red_118, aes(x = prop_red)) +
  geom_histogram(binwidth = 0.05, boundary = 0.4, color = "white") +
  labs(x = "Proportion of 118 balls that were red", title = "118") 


<div class="layout-chunk" data-layout="l-body">
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class='co'># n = 28</span>
<span class='va'>virtual_prop_red_28</span> <span class='op'><a href='moderndive.github.io/moderndive//reference/pipe.html'>%&gt;%</a></span> 
  <span class='fu'><a href='https://dplyr.tidyverse.org/reference/summarise.html'>summarize</a></span><span class='op'>(</span>sd <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/stats/sd.html'>sd</a></span><span class='op'>(</span><span class='va'>prop_red</span><span class='op'>)</span><span class='op'>)</span>
</code></pre></div>

A tibble: 1 x 1

  sd

1 0.0911


<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class='co'># n = 53</span>
<span class='va'>virtual_prop_red_53</span> <span class='op'><a href='moderndive.github.io/moderndive//reference/pipe.html'>%&gt;%</a></span> 
  <span class='fu'><a href='https://dplyr.tidyverse.org/reference/summarise.html'>summarize</a></span><span class='op'>(</span>sd <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/stats/sd.html'>sd</a></span><span class='op'>(</span><span class='va'>prop_red</span><span class='op'>)</span><span class='op'>)</span>
</code></pre></div>

A tibble: 1 x 1

  sd

1 0.0641


<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class='co'># n = 118</span>
<span class='va'>virtual_prop_red_118</span> <span class='op'><a href='moderndive.github.io/moderndive//reference/pipe.html'>%&gt;%</a></span> 
  <span class='fu'><a href='https://dplyr.tidyverse.org/reference/summarise.html'>summarize</a></span><span class='op'>(</span>sd <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/stats/sd.html'>sd</a></span><span class='op'>(</span><span class='va'>prop_red</span><span class='op'>)</span><span class='op'>)</span>
</code></pre></div>

A tibble: 1 x 1

  sd

1 0.0444


</div>


<div class="layout-chunk" data-layout="l-body">
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class='va'>bowl</span>
</code></pre></div>

A tibble: 2,400 x 2

ball_ID color 1 1 white 2 2 white 3 3 white 4 4 red
5 5 white 6 6 white 7 7 red
8 8 white 9 9 red
10 10 white # … with 2,390 more rows


</div>


<div class="layout-chunk" data-layout="l-body">
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class='va'>bowl</span> <span class='op'><a href='moderndive.github.io/moderndive//reference/pipe.html'>%&gt;%</a></span> 
  <span class='fu'><a href='https://dplyr.tidyverse.org/reference/summarise.html'>summarize</a></span><span class='op'>(</span>red <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/sum.html'>sum</a></span><span class='op'>(</span><span class='va'>color</span> <span class='op'>==</span> <span class='st'>"red"</span><span class='op'>)</span><span class='op'>)</span> 
</code></pre></div>

A tibble: 1 x 1

red

1 900


</div>


<div class="layout-chunk" data-layout="l-body">
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class='va'>virtual_shovel</span> <span class='op'>&lt;-</span> <span class='va'>bowl</span> <span class='op'><a href='moderndive.github.io/moderndive//reference/pipe.html'>%&gt;%</a></span> 
  <span class='fu'><a href='https://infer.tidymodels.org/reference/rep_sample_n.html'>rep_sample_n</a></span><span class='op'>(</span>size <span class='op'>=</span> <span class='fl'>50</span><span class='op'>)</span>
<span class='va'>virtual_shovel</span>
</code></pre></div>

A tibble: 50 x 3

Groups: replicate [1]

replicate ball_ID color 1 1 1508 white 2 1 1689 red
3 1 2162 red
4 1 694 red
5 1 1056 white 6 1 1891 white 7 1 855 red
8 1 1584 white 9 1 1537 red
10 1 1173 white # … with 40 more rows


</div>


<div class="layout-chunk" data-layout="l-body">
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class='va'>virtual_shovel</span> <span class='op'><a href='moderndive.github.io/moderndive//reference/pipe.html'>%&gt;%</a></span> 
  <span class='fu'><a href='https://dplyr.tidyverse.org/reference/summarise.html'>summarize</a></span><span class='op'>(</span>num_red <span class='op'>=</span> <span class='fu'><a href='https://rdrr.io/r/base/sum.html'>sum</a></span><span class='op'>(</span><span class='va'>color</span> <span class='op'>==</span> <span class='st'>"red"</span><span class='op'>)</span><span class='op'>)</span> <span class='op'><a href='moderndive.github.io/moderndive//reference/pipe.html'>%&gt;%</a></span> 
  <span class='fu'><a href='https://dplyr.tidyverse.org/reference/mutate.html'>mutate</a></span><span class='op'>(</span>prop_red <span class='op'>=</span> <span class='va'>num_red</span> <span class='op'>/</span> <span class='fl'>50</span><span class='op'>)</span>
</code></pre></div>

A tibble: 1 x 3

replicate num_red prop_red 1 1 20 0.4


</div>

















```{.r .distill-force-highlighting-css}