# Section 3.7 — Inventory of statistical tests#

This notebook contains the code examples from Section 3.7 Inventory of statistical tests from the No Bullshit Guide to Statistics.

## Notebook setup#

# load Python modules
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Plot helper functions
from ministats import plot_pdf

# Figures setup
plt.clf()  # needed otherwise sns.set_theme doesn't work
from plot_helpers import RCPARAMS
RCPARAMS.update({'figure.figsize': (10, 3)})   # good for screen
# RCPARAMS.update({'figure.figsize': (5, 1.6)})  # good for print
sns.set_theme(
context="paper",
style="whitegrid",
palette="colorblind",
rc=RCPARAMS,
)

# Useful colors
snspal = sns.color_palette()
blue, orange, purple = snspal[0], snspal[1], snspal[4]

%config InlineBackend.figure_format = 'retina'

# Where to store figures
DESTDIR = "figures/stats/inventory"

<Figure size 640x480 with 0 Axes>

# set random seed for repeatability
np.random.seed(42)

#######################################################


## Z-Tests#

### One-sample $$z$$-test#

See the examples/one_sample_z-test.ipynb notebook.

## Proportion tests#

### Two-sample $$z$$-test for proportions#

from statsmodels.stats.proportion import proportions_ztest


## T-tests#

### One sample $$t$$-test#

See the examples/one_sample_t-test.ipynb notebook.

### Welch’s two-sample $$t$$-test#

(explain pooled variance as special case “Two sample t-test”, but inferior)

## Chi-square tests#

### Chi-square test for goodness of fit#

#### Example: are digits of $$\pi$$ random?#

pidigits = [99959,  99757, 100026, 100230, 100230, 100359,  99548,  99800, 99985, 100106]
# obtained using   np.bincount(list(str(sympy.N(sympy.pi, 1_000_000)).replace('.','')))

os = pidigits           # observed
es = [1_000_000/10]*10  # expected (uniform)

from scipy.stats import chisquare
chisquare(f_obs=os, f_exp=es)

Power_divergenceResult(statistic=5.51852, pvalue=0.7869706202650393)


## Nonparametric tests#

Use when assumptions for other tests not valid

### Sign test for the population median#

from scipy.stats import binomtest

n_pos = 6
n_neg = 9
n_min = min(n_pos, n_neg)
n_tot = n_pos + n_neg

# Calculate p-value (two-tailed) using the binomial test
binomtest(k=n_min, n=n_tot, p=0.5, alternative='two-sided')

BinomTestResult(k=6, n=15, alternative='two-sided', statistic=0.4, pvalue=0.6072387695312499)

n_max = max(n_pos, n_neg)
binomtest(k=n_max, n=n_tot, p=0.5, alternative='two-sided')

BinomTestResult(k=9, n=15, alternative='two-sided', statistic=0.6, pvalue=0.6072387695312499)


### Mann-Whitney U-test#

dfw = pd.read_csv("https://reneshbedre.github.io/assets/posts/mann_whitney/genotype.csv")
dfw.shape
# dfw

(23, 2)

from scipy.stats import mannwhitneyu

mannwhitneyu(x=dfw["A"], y=dfw["B"], alternative="two-sided")

MannwhitneyuResult(statistic=489.5, pvalue=7.004695394561307e-07)


## Resampling methods#

### Simulation tests#

from ministats.hypothesis_tests import simulation_test

%psource simulation_test


### Two-sample permutation test#

from ministats.hypothesis_tests import permutation_test

%psource permutation_test


### Permutation ANOVA#

from ministats import permutation_anova

%psource permutation_anova

# test on three samples
from scipy.stats import norm

# Random samples
np.random.seed(43)
sample1 = norm(loc=0).rvs(size=30)
sample2 = norm(loc=0).rvs(size=30)
sample3 = norm(loc=0.7).rvs(size=30)

np.random.seed(45)
permutation_anova([sample1, sample2, sample3])

0.0297

# compare with analytical formula
from scipy.stats import f_oneway

f_oneway(sample1, sample2, sample3)

F_onewayResult(statistic=3.6808227678358856, pvalue=0.029206733498721497)


## Equivalence tests#

See examples/two_sample_equivalence_test.ipynb for an example.