# Section 3.6 — Statistical design and error analysis#

This notebook contains the code examples from Section 3.6 Statistical design and error analysis of the No Bullshit Guide to Statistics.

## Notebook setup#

# load Python modules
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Plot helper functions
from ministats import plot_pdf
from ministats.utils import savefigure

# Figures setup
plt.clf()  # needed otherwise sns.set_theme doesn't work
from plot_helpers import RCPARAMS
RCPARAMS.update({'figure.figsize': (10, 3)})   # good for screen
# RCPARAMS.update({'figure.figsize': (5, 1.6)})  # good for print
sns.set_theme(
context="paper",
style="whitegrid",
palette="colorblind",
rc=RCPARAMS,
)

# Useful colors
snspal = sns.color_palette()
blue, orange, purple = snspal[0], snspal[1], snspal[4]

%config InlineBackend.figure_format = 'retina'

# Where to store figures
DESTDIR = "figures/stats/design"

<Figure size 640x480 with 0 Axes>

# set random seed for repeatability
np.random.seed(42)

#######################################################


## Definitions#

# FIGURES ONLY
filename = os.path.join(DESTDIR, "H0_rejection_region.pdf")
from ministats import plot_alpha_beta_errors
with sns.axes_style("ticks"), plt.rc_context({"figure.figsize":(7,2.5)}):
ax = plot_alpha_beta_errors(cohend=0.1, ax=None, xlims=[-3,3], n=9, show_alt=False, show_concl=True, alpha_offset=(0.06,0.013))
savefigure(ax, filename)

Design params: n = 9 , alpha = 0.05 , beta = 0.910663745890349 , Delta = 0.2 , d = 0.1 , CV = 1.0965690846343148

Saved figure to figures/stats/design/H0_rejection_region.pdf
Saved figure to figures/stats/design/H0_rejection_region.png

# FIGURES ONLY
filename = os.path.join(DESTDIR, "H0_HA_distributions_cvalue.pdf")
from ministats import plot_alpha_beta_errors

with sns.axes_style("ticks"), plt.rc_context({"figure.figsize":(8,2.5)}):
ax = plot_alpha_beta_errors(cohend=0.8, show_dist_labels=True, show_concl=True,
alpha_offset=(0.06,0.013), beta_offset=(-0.06,0.02))

savefigure(ax, filename)

Design params: n = 9 , alpha = 0.05 , beta = 0.2250805806658559 , Delta = 1.6 , d = 0.8 , CV = 1.0965690846343148

Saved figure to figures/stats/design/H0_HA_distributions_cvalue.pdf

Saved figure to figures/stats/design/H0_HA_distributions_cvalue.png


## Hypothesis decision rules#

### Decision rule based on $$p$$-values#

# pre-data
alpha = ...   # chosen in advance

# post-data
obst = ...    # calculated from sample
rvT0 = ...    # sampling distribution under H0
pvalue = ...  # prob. of obst under rvT0

# make decision based on p-value
if pvalue <= alpha:
decision = "Reject H0"
else:
decision = "Fail to reject H0"


### Simplified decision rule#

# pre-data
alpha = ...     # chosen in advance
rvT0 = ...      # sampling distribution under H0
CV_alpha = ...  # calculated from alpha-quantile of rvT0

# post-data
obst = ...      # calculated from sample
# make decision based on test statistic
if obst >= CV_alpha:
decision = "Reject H0"
else:
decision = "Fail to reject H0"


## Statistical design#

# FIGURES ONLY
filename = os.path.join(DESTDIR, "panel_beta_for_different_effect_sizes.pdf")
d_small = 0.20
d_medium = 0.57 # chosen to avoid overlap between CV and Delta
d_large = 0.80
d_vlarge = 1.3

with sns.axes_style("ticks"):
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,2, figsize=(6,3))

plot_alpha_beta_errors(cohend=d_small,  xlims=[-1.7,2.8], n=15, ax=ax1, fontsize=10, show_es=True,
alpha_offset=(-0.03,0.005), beta_offset=(0.1,0.2))
ax1.set_title("(a) Small effect size", fontsize=11)

plot_alpha_beta_errors(cohend=d_medium, xlims=[-1.6,2.9], n=15, ax=ax2, fontsize=10, show_es=True,
alpha_offset=(-0.03,0.005), beta_offset=(0.05,0.08))
ax2.set_title("(b) Medium effect size", fontsize=11)

plot_alpha_beta_errors(cohend=d_large,  xlims=[-1.5,3], n=15, ax=ax3, fontsize=10, show_es=True,
alpha_offset=(-0.03,0.005), beta_offset=(0,0.02))
ax3.set_title("(c) Large effect size", fontsize=11)

plot_alpha_beta_errors(cohend=d_vlarge,  xlims=[-1.5,3], n=15, ax=ax4, fontsize=10, show_es=True,
alpha_offset=(-0.03,0.005), beta_offset=(-0.06,0.02))
ax4.set_title("(d) Very large effect size", fontsize=11)

savefigure(fig, filename)

Design params: n = 15 , alpha = 0.05 , beta = 0.8079200023112518 , Delta = 0.4 , d = 0.2 , CV = 0.8493987605509224
Design params: n = 15 , alpha = 0.05 , beta = 0.28680362819561334 , Delta = 1.14 , d = 0.57 , CV = 0.8493987605509224

Design params: n = 15 , alpha = 0.05 , beta = 0.07303790512845224 , Delta = 1.6 , d = 0.8 , CV = 0.8493987605509224
Design params: n = 15 , alpha = 0.05 , beta = 0.0003494316033385532 , Delta = 2.6 , d = 1.3 , CV = 0.8493987605509224

Saved figure to figures/stats/design/panel_beta_for_different_effect_sizes.pdf

Saved figure to figures/stats/design/panel_beta_for_different_effect_sizes.png

# FIGURES ONLY
filename = os.path.join(DESTDIR, "panel_beta_for_different_sample_sizes.pdf")
Delta = 0.6

with sns.axes_style("ticks"):
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(7,1.6))

n1 = 15
plot_alpha_beta_errors(cohend=Delta,  xlims=[-1.7,2.8], n=n1, ax=ax1, fontsize=10, show_es=True,
alpha_offset=(0.03,0.005), beta_offset=(0,0.05))
ax1.set_title(f"(a) Small sample size $n={n1}$", fontsize=12)

n2 = 30
plot_alpha_beta_errors(cohend=Delta,  xlims=[-1.5,3], n=n2, ax=ax2, fontsize=10, show_es=True,
alpha_offset=(0.005,0), beta_offset=(0.04,0.01))
ax2.set_title(f"(b) Larger sample size $n={n2}$", fontsize=12)

savefigure(fig, filename)

Design params: n = 15 , alpha = 0.05 , beta = 0.24858908649659617 , Delta = 1.2 , d = 0.6 , CV = 0.8493987605509224
Design params: n = 30 , alpha = 0.05 , beta = 0.0503487295055579 , Delta = 1.2 , d = 0.6 , CV = 0.6006156235170058

Saved figure to figures/stats/design/panel_beta_for_different_sample_sizes.pdf
Saved figure to figures/stats/design/panel_beta_for_different_sample_sizes.png

# FIGURES ONLY
filename = os.path.join(DESTDIR, "panel_beta_for_different_alphas.pdf")
Delta = 0.6

with sns.axes_style("ticks"):
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(7,1.6))

n1 = 15
alpha1 = 0.05
plot_alpha_beta_errors(alpha=alpha1, cohend=Delta, xlims=[-1.7,2.8], n=15, ax=ax1, fontsize=10, show_es=True,
alpha_offset=(-0.02,0.005), beta_offset=(0.1,0.1))
ax1.set_title(r"(a) Conventional alpha $\alpha=0.05$", fontsize=12)

alpha2 = 0.1
plot_alpha_beta_errors(alpha=alpha2, cohend=Delta, xlims=[-1.7,2.8], n=15, ax=ax2, fontsize=10, show_es=True,
alpha_offset=(0,0.005), beta_offset=(0.05,0.05))
ax2.set_title(r"(b) Larger alpha $\alpha=0.1$", fontsize=12)

savefigure(fig, filename)

Design params: n = 15 , alpha = 0.05 , beta = 0.24858908649659617 , Delta = 1.2 , d = 0.6 , CV = 0.8493987605509224
Design params: n = 15 , alpha = 0.1 , beta = 0.14865057213685162 , Delta = 1.2 , d = 0.6 , CV = 0.6617903827547039

Saved figure to figures/stats/design/panel_beta_for_different_alphas.pdf
Saved figure to figures/stats/design/panel_beta_for_different_alphas.png


### Cohen’s d standardized effect size#

The effect size $$\Delta$$ we use in statistical design is usually expressed as a standardized effect size like Cohen’s $$d$$, which is defined as the observed “difference” divided by a standard deviation of the theoretical model under the null hypothesis.

#### One-sample case#

For a one-sample test of the mean relative to a null model with mean $$\mu_0$$, Cohen’s $$d$$ is calculated as:

$d = \frac{\overline{\mathbf{x}} - \mu_0}{ s_{\mathbf{x}} },$

where $$s_{\mathbf{x}}$$ is the sample standard deviation.

When doing statistical design, we haven’t obtained the sample $$\mathbf{x}$$ yet so we don’t know what its standard deviation $$s_{\mathbf{x}}$$ will be, so we have to make an educated guess of this value, which usually means using the standard deviation of the theoretical model $$\sigma_{X_0}$$.

#### Two-sample case#

For a two-sample test for the difference between means, Cohen’s $$d$$ is defined as the difference between sample means divided by the pooled standard deviation:

$d = \frac{\overline{\mathbf{x}} - \overline{\mathbf{y}} }{ s_{p} }.$

where the formula for the pooled variance is $$s_{p}^2 = [(n -1)s_{\mathbf{x}}^2 + (m -1)s_{\mathbf{y}}^2]/(n + m -2)$$.

When doing statistical design, we don’t know the standard deviations $$s_{\mathbf{x}}$$ and $$s_{\mathbf{y}}$$ so we replace them with the theoretical standard deviations $$\sigma_{X_0}$$ and $$\sigma_{Y_0}$$ under the null hypothesis.

Recall table of reference values for Cohen’s $$d$$ standardized effect sizes, suggested by Cohen in (Cohen YYYY TODO).

Cohen’s d

Effect size

0.01

very small

0.20

small

0.50

medium

0.80

large

The formulas for sample size planning and power calculations we’ll present below are based on Cohen’s $$d$$.

This means you’ll have to express your guess about the effect size $$\Delta$$ as $$d = \frac{\Delta}{\sigma_0}$$ where $$\sigma_0$$ is your best about the standard deviation of the theoretical distribution.

Note Cohen’s $$d$$ is based on the standard deviations of the “raw” theoretical population, and not standard error.

## Example 1: detect kombuncha volume deviation from theory#

Design Type A: choose $$\alpha=0.05$$, $$\beta=0.2$$, $$\Delta_{\textrm{min}} = 4\;\text{ml}$$ then calculate required sample size $$n$$. We’ll assume $$\sigma=10$$.

alpha = 0.05
beta = 0.2
Delta_min = 4

# assumption
sigma = 10

from scipy.stats import norm
rvZ = norm(loc=0, scale=1)

z_u = rvZ.ppf(1-alpha)
z_l = rvZ.ppf(beta)

n_approx = (z_u - z_l)**2 * sigma**2 / Delta_min**2
n_approx

38.64098270012354


### Using statsmodels#

To use the statsmodels sample size estimation function, we’ll need to express the effect size $$\Delta=4$$ in terms of Cohen’s $$d$$.

d = Delta_min / sigma
d

0.4

from statsmodels.stats.power import TTestPower
ttp = TTestPower()
n = ttp.solve_power(effect_size=d, alpha=0.05, power=0.8,
alternative="larger")
n

40.02907682056493

filename = os.path.join(DESTDIR, "plot_one_sample_t_power_vs_sample_size.pdf")
ds = np.array([0.3, 0.4, 0.5])
ns = np.arange(5, 101)
fig, ax = plt.subplots(figsize=(6,2.4))
ttp.plot_power(dep_var="nobs", ax=ax,
effect_size=ds, nobs=ns, alpha=0.05,
alternative="larger")
ax.set_xticks( np.arange(5,105,5) )
# ax.set_title("Power of t-test vs. sample size for different effect sizes")
ax.set_title(None)
savefigure(fig, filename)

Saved figure to figures/stats/design/plot_one_sample_t_power_vs_sample_size.pdf

Saved figure to figures/stats/design/plot_one_sample_t_power_vs_sample_size.png

filename = os.path.join(DESTDIR, "plot_one_sample_t_power_vs_effect_size.pdf")
ds = np.arange(0, 2, 0.01)
ns = np.array([5, 10, 50, 100])
fig, ax = plt.subplots(figsize=(6,2.4))
ttp.plot_power(dep_var="effect size", ax=ax,
effect_size=ds, nobs=ns, alpha=0.05,
alternative="larger")
ax.set_xticks( np.arange(0, 2+0.1, 0.1) )
# ax.set_title("Power of t-test vs. effect size for different sample sizes")
ax.set_title(None)
savefigure(fig, filename)

Saved figure to figures/stats/design/plot_one_sample_t_power_vs_effect_size.pdf

Saved figure to figures/stats/design/plot_one_sample_t_power_vs_effect_size.png

#######################################################
batch55pop = kombuchapop[kombuchapop["batch"]==55]
kpop55 = batch55pop["volume"]

np.random.seed(42)
n = 41  # rounding up
ksample55 = kpop55.sample(n)
len(ksample55)

41

# ksample55.values

from ministats import ttest_mean

ttest_mean(ksample55, mu0=1000)

0.43686918358182525

# ALT.
# from ministats import simulation_test_mean
# simulation_test_mean(ksample55, mu0=1000, sigma0=10)


## Example 2: comparison of East vs. West electricity prices#

#######################################################
eprices.groupby("loc").describe()

price
count mean std min 25% 50% 75% max
loc
East 9.0 6.155556 0.877655 4.8 5.5 6.3 6.5 7.7
West 9.0 9.155556 1.562139 6.8 8.3 8.6 10.0 11.8

Need to calculate Cohen’s $$d$$ that corresponds to the assumption $$\Delta_{\text{min}} = 1$$ and assuming the standard deviation of the population is $$\sigma=1$$.

Delta_min = 1
std_guess = 1

d_min = Delta_min / std_guess


### Solving for a desired power#

The function solve_power takes as argument the chosen level of power, and two of the three other design parameters alpha, nobs1, and effect_size, and calculates the value of the third parameter required to achieve the chosen level of power $$=(1-\beta)$$.

from statsmodels.stats.power import TTestIndPower
ttindp = TTestIndPower()

# power of two-sample t-test assuming d_min and n=m=9
ttindp.power(effect_size=d_min, nobs1=9,
alpha=0.05, alternative="larger")

0.6500703467709203

filename = os.path.join(DESTDIR, "plot_two_sample_t_power_vs_sample_size.pdf")
ds = np.array([0.5, 0.8, 1.2])
ns = np.arange(2, 71)
fig, ax = plt.subplots(figsize=(6,2.4))
ttindp.plot_power(dep_var="nobs", ax=ax,
effect_size=ds, nobs=ns, alpha=0.05,
alternative="larger")
ax.set_xticks( np.arange(5,75,5) )
# ax.set_title("Power of two-sample t-test vs. sample size $n=m$ for different effect sizes")
ax.set_title(None)
savefigure(fig, filename)

Saved figure to figures/stats/design/plot_two_sample_t_power_vs_sample_size.pdf

Saved figure to figures/stats/design/plot_two_sample_t_power_vs_sample_size.png

filename = os.path.join(DESTDIR, "plot_two_sample_t_power_vs_effect_size.pdf")
ds = np.arange(0, 2, 0.01)
ns = np.array([5, 10, 50, 100])
fig, ax = plt.subplots(figsize=(6,2.4))
ttindp.plot_power(dep_var="effect size", ax=ax,
effect_size=ds, nobs=ns, alpha=0.05,
alternative="larger")
ax.set_xticks( np.arange(0, 2+0.1, 0.1) )
# ax.set_title("Power of two-smaple t-test vs. effect size for different sample sizes")
ax.set_title(None)
savefigure(fig, filename)

Saved figure to figures/stats/design/plot_two_sample_t_power_vs_effect_size.pdf

Saved figure to figures/stats/design/plot_two_sample_t_power_vs_effect_size.png


### Calculate minimum effect size#

# minimum effect size to achieve 80% power when n=m=9
ttindp.solve_power(alpha=0.05, power=0.8, nobs1=9, alternative="larger")

1.2250749306456195


### (optional) Calculate sample size needed to achieve 0.8 power#

# minimum sample size required to achieve 80% power when effect size is d=1
ttindp.solve_power(effect_size=1.0, alpha=0.05, power=0.8, alternative="larger")
#######################################################

13.097761955067904


## Alternative calculation methods#

### Using pingouin#

import pingouin as pg


#### One-sample $$t$$-test#

Delta_min = 0.4
pg.power_ttest(d=Delta_min, alpha=0.05, power=0.8,
contrast="one-sample", alternative="greater")

40.02907615780519

# power of one-sample t-test assuming d=0.4 and n=41
pg.power_ttest(d=0.4, alpha=0.05, n=41,
contrast="one-sample", alternative="greater")

0.8085822366975572


#### Two-sample $$t$$-test#

# power of two-sample t-test assuming d_min and n=m=9
pg.power_ttest(d=d_min, alpha=0.05, n=9,
contrast="two-samples", alternative="greater")

0.65007034847832

# minimum effect size to achieve 80% when n=m=9
pg.power_ttest(alpha=0.05, power=0.8, n=9,
contrast="two-samples", alternative="greater")

1.2250749193350323

# (optional) sample size required to achieve d_min at 80% power
pg.power_ttest(d=d_min, alpha=0.05, power=0.8,
contrast="two-samples", alternative="greater")

13.097761619826874


## Explanations#

### One-sided and two-sided rejection regions#

from scipy.stats import t as tdist
rvT0 = tdist(df=9)

alpha = 0.05

# right-tailed rejection region
rvT0.ppf(alpha)

-1.8331129326536337

# left-tailed rejection region
rvT0.ppf(1-alpha)

1.8331129326536335

# two-sided rejection region
rvT0.ppf(alpha/2), rvT0.ppf(1 - alpha/2)

(-2.262157162740992, 2.2621571627409915)

filename = os.path.join(DESTDIR, "panel_rejection_regions_left_twotailed_right.pdf")

from scipy.stats import t as tdist
rvT = tdist(df=9)

xs = np.linspace(-4, 4, 1000)
ys = rvT.pdf(xs)

# design choices
transp = 0.3
alpha_color = "#4A25FF"
beta_color = "#0CB0D6"
axis_color = "#808080"

def plot_rejection_region(ax, xs, ys, alt, title):
ax.set_title(title, fontsize=11)

# manually add arrowhead to x-axis + label t at the end
ax.plot(1, 0, ">", color=axis_color, transform=ax.get_yaxis_transform(), clip_on=False)
ax.set_xlabel("t")
ax.xaxis.set_label_coords(1, 0.2)

sns.lineplot(x=xs, y=ys, ax=ax, color="k")
ax.set_xlim(-4, 4)
ax.set_ylim(0, 0.42)
ax.set_xticklabels([])
ax.set_yticks([])
ax.spines[['left', 'right', 'top']].set_visible(False)
ax.spines['bottom'].set_color(axis_color)
ax.tick_params(axis='x', colors=axis_color)
ax.xaxis.label.set_color(axis_color)

if alt == "greater":
ax.set_xticks([2])
# highlight the right tail
ax.vlines([2], ymin=0, ymax=rvT.pdf(2), linestyle="-", alpha=transp+0.2, color=alpha_color)
ax.text(2, -0.03, r"$\mathrm{CV}_{\alpha}^+$", verticalalignment="top", horizontalalignment="center")

elif alt == "less":
ax.set_xticks([-2])
# highlight the left tail
ax.vlines([-2], ymin=0, ymax=rvT.pdf(-2), linestyle="-", alpha=transp+0.2, color=alpha_color)
ax.text(-2, -0.03, r"$\mathrm{CV}_{\alpha}^-$", verticalalignment="top", horizontalalignment="center")

elif alt == "two-sided":
ax.set_xticks([-2,2])
# highlight the left and right tails
ax.vlines([-2], ymin=0, ymax=rvT.pdf(-2), linestyle="-", alpha=transp+0.2, color=alpha_color)
ax.text(-2, -0.03, r"$\mathrm{CV}_{\alpha/2}^-$", verticalalignment="top", horizontalalignment="center")
ax.vlines([2], ymin=0, ymax=rvT.pdf(2), linestyle="-", alpha=transp+0.2, color=alpha_color)
ax.text(2, -0.03, r"$\mathrm{CV}_{\alpha/2}^+$", verticalalignment="top", horizontalalignment="center")

with sns.axes_style("ticks"), plt.rc_context({"figure.figsize":(7,1.6)}):
fig, (ax3, ax1, ax2) = plt.subplots(1,3)

# RIGHT
title = '(a) right-tailed rejection region'
plot_rejection_region(ax3, xs, ys, "greater", title)

# LEFT
title = '(b) left-tailed rejection region'
plot_rejection_region(ax1, xs, ys, "less", title)

# TWO-TAILED
title = '(c) two-tailed rejection region'
plot_rejection_region(ax2, xs, ys, "two-sided", title)

savefigure(fig, filename)

Saved figure to figures/stats/design/panel_rejection_regions_left_twotailed_right.pdf
Saved figure to figures/stats/design/panel_rejection_regions_left_twotailed_right.png