Section 5.4 — Bayesian difference between means#

This notebook contains the code examples from Section 5.4 Bayesian difference between means from the No Bullshit Guide to Statistics.

See also:

Notebook setup#

# load Python modules
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import bambi as bmb
import arviz as az
# Figures setup
plt.clf()  # needed otherwise `sns.set_theme` doesn"t work
from plot_helpers import RCPARAMS
RCPARAMS.update({"figure.figsize": (5, 3)})   # good for screen
# RCPARAMS.update({"figure.figsize": (5, 1.6)})  # good for print
sns.set_theme(
    context="paper",
    style="whitegrid",
    palette="colorblind",
    rc=RCPARAMS,
)

# High-resolution please
%config InlineBackend.figure_format = "retina"
<Figure size 640x480 with 0 Axes>
# set random seed for repeatability
np.random.seed(42)
#######################################################

Introduciton#

We’ll now revisit the problem of comparison between two groups from a Bayesian perspective.

Model#

The data models for the two groups are based on Student’s \(t\)-distribution:

\[ X \sim \mathcal{T}(\nu, M_X, \Sigma_X) \qquad \text{and} \qquad Y \sim \mathcal{T}(\nu, M_Y, \Sigma_Y). \]

We’ll use a normal prior for the means, half-\(t\) priors for standard deviations, and a gamma distribution for the degrees of freedom parameter:

\[ M_X, M_Y \sim \mathcal{N}(\mu_{M}, \sigma_{M}), \quad \Sigma_X, \Sigma_Y \sim \mathcal{T}^+\!(\nu_{\Sigma}, \sigma_{\Sigma}), \quad \nu \sim \textrm{Gamma}(\alpha,\beta). \]

Bambi formula objects#

import bambi as bmb
formula = bmb.Formula("y ~ 0 + group",
                      "sigma ~ 0 + group")
formula
Formula('y ~ 0 + group', 'sigma ~ 0 + group')
formula.main, formula.additionals
('y ~ 0 + group', ('sigma ~ 0 + group',))
formula.get_all_formulas()
['y ~ 0 + group', 'sigma ~ 0 + group']

Choosing priors for log-sigma#

Normal distribution for log-sigma#

from scipy.stats import norm

def get_lognormal(mu=0, sigma=1):
    logsigs = np.linspace(-5, 5, 1000)
    dlogsigs = norm(mu, sigma).pdf(logsigs)
    sigmas = np.exp(logsigs)
    # Apply the change of variables to get the density for sigma
    # based on the Jacobian |d(log(sigma))/d(sigma)| = 1/sigma
    dsigmas = dlogsigs / sigmas
    return sigmas, dsigmas


with plt.rc_context({"figure.figsize":(6,2.2)}):
    fig, (ax1, ax2) = plt.subplots(1,2, sharey=True)

    # PLOT 1
    mu, sigma = 0, 1
    sigmas, dsigmas = get_lognormal(mu, sigma)
    sns.lineplot(x=sigmas, y=dsigmas, ax=ax1)
    ax1.set_xlabel('$\\sigma$')
    ax1.set_ylabel('density')
    ax1.set_title(f'(a) LogNorm({mu},{sigma}) = exp($\\mathcal{{N}}$({mu},{sigma}))')
    ax1.set_xlim(0, 10)
    ax1.set_ylim(0, 0.7)

    # PLOT 2
    mu, sigma = 1, 2
    sigmas, dsigmas = get_lognormal(mu, sigma)
    sns.lineplot(x=sigmas, y=dsigmas, ax=ax2)
    ax2.set_xlabel('$\\sigma$')
    ax2.set_ylabel('density')
    ax2.set_title(f'(b) LogNorm({mu},{sigma}) = exp($\\mathcal{{N}}$({mu},{sigma}))')
    ax2.set_xlim(0, 10)

Choosing priors for the degrees of freedom parameter#

# FIGURES ONLY
from scipy.stats import gamma
from scipy.stats import expon
from ministats import plot_pdf

with plt.rc_context({"figure.figsize":(6,2.2)}):
    fig, (ax1, ax2) = plt.subplots(1,2, sharey=True)

    # PLOT 1
    alpha, beta = 2, 0.1
    rv_nu = gamma(a=2, scale=1/beta)
    plot_pdf(rv_nu, rv_name="ν", ax=ax1)
    ax1.set_title(f'(a) Gamma($\\alpha=${alpha}, $\\beta=${beta})')
    ax1.set_xlim(0, 100)

    # PLOT 2
    scale = 30
    rv_nu_exp = expon(scale=scale)
    plot_pdf(rv_nu_exp, rv_name="ν", ax=ax2)
    ax2.set_title(f'(b) Expon($\\lambda=$1/{scale})')
    ax2.set_xlim(0, 100)

Example 1: comparing electricity prices#

Electricity prices from East End and West End

Electricity prices dataset#

eprices = pd.read_csv("../datasets/eprices.csv")
eprices.groupby("loc")["price"].describe()
count mean std min 25% 50% 75% max
loc
East 9.0 6.155556 0.877655 4.8 5.5 6.3 6.5 7.7
West 9.0 9.155556 1.562139 6.8 8.3 8.6 10.0 11.8
eprices["price"].mean(), eprices["price"].std()
(7.655555555555556, 1.973120020267162)

Bayesian model#

\[\begin{align*} X_W \sim \mathcal{T}(\nu, M_W, \Sigma_W), \qquad \qquad X_E \sim \mathcal{T}(\nu, M_E, \Sigma_E), \qquad\qquad \\ M_W, M_E \!\sim\! \mathcal{N}(\mu_{M}, \sigma_{M}), \quad \log\Sigma_W, \log\Sigma_E\!\sim\!\mathcal{N}(0,1), \quad \nu \!\sim\! \text{Gamma}(2,0.1). \end{align*}\]

Bambi model#

formula1 = bmb.Formula("price ~ 0 + loc",
                       "sigma ~ 0 + loc")

links1 = {"mu":"identity",
          "sigma":"log"}

priors1 = {
    "loc": bmb.Prior("Normal", mu=8, sigma=5),
    "sigma": {
        "loc": bmb.Prior("Normal", mu=0, sigma=1)
    },
    "nu": bmb.Prior("Gamma", alpha=2, beta=0.1),
}

mod1 = bmb.Model(formula=formula1,
                 family="t",
                 link=links1,
                 priors=priors1,
                 data=eprices)
mod1
       Formula: price ~ 0 + loc
                sigma ~ 0 + loc
        Family: t
          Link: mu = identity
                sigma = log
  Observations: 18
        Priors: 
    target = mu
        Common-level effects
            loc ~ Normal(mu: 8.0, sigma: 5.0)
        
        Auxiliary parameters
            nu ~ Gamma(alpha: 2.0, beta: 0.1)
    target = sigma
        Common-level effects
            sigma_loc ~ Normal(mu: 0.0, sigma: 1.0)
mod1.build()
mod1.graph()
../_images/44379b3cd682b8ee05b736e31655a68468704e0de311435bf211b67c63166963.svg

Prior predictive checks#

# TODO

Model fitting and analysis#

idata1 = mod1.fit(random_seed=[42,43,44,45])
/opt/hostedtoolcache/Python/3.10.16/x64/lib/python3.10/site-packages/pymc/sampling/mcmc.py:764: UserWarning: A list or tuple of random_seed no longer specifies the specific random_seed of each chain. Use a single seed instead.
  warnings.warn(
Initializing NUTS using jitter+adapt_diag...
/opt/hostedtoolcache/Python/3.10.16/x64/lib/python3.10/site-packages/pytensor/link/c/cmodule.py:2959: UserWarning: PyTensor could not link to a BLAS installation. Operations that might benefit from BLAS will be severely degraded.
This usually happens when PyTensor is installed via pip. We recommend it be installed via conda/mamba/pixi instead.
Alternatively, you can use an experimental backend such as Numba or JAX that perform their own BLAS optimizations, by setting `pytensor.config.mode == 'NUMBA'` or passing `mode='NUMBA'` when compiling a PyTensor function.
For more options and details see https://pytensor.readthedocs.io/en/latest/troubleshooting.html#how-do-i-configure-test-my-blas-library
  warnings.warn(
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [nu, loc, sigma_loc]

Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 2 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics

Processing the results#

Calculate derived quantities used for the analysis plots and summaries.

post1 = idata1["posterior"]

# Calculate sigmas from log-sigmas
#######################################################
logsig_W = post1["sigma_loc"].sel(sigma_loc_dim="West")
logsig_E = post1["sigma_loc"].sel(sigma_loc_dim="East")
post1["sigma_West"] = np.exp(logsig_W)
post1["sigma_East"] = np.exp(logsig_E)

# Calculate the difference between between means
post1["mu_West"] = post1["loc"].sel(loc_dim="West")
post1["mu_East"] = post1["loc"].sel(loc_dim="East")
post1["dmeans"] = post1["mu_West"] - post1["mu_East"]

# Calculate the difference between standard deviations
post1["dstd"] = post1["sigma_West"]-post1["sigma_East"]

# Effect size
#######################################################
pvar =(post1["sigma_West"]**2+post1["sigma_East"]**2)/2
post1["cohend"] = post1["dmeans"] / np.sqrt(pvar)
# #ALT: use helper function
# from ministats import calc_dmeans_stats
# calc_dmeans_stats(idata1, group_name="loc")
az.summary(idata1, kind="stats", hdi_prob=0.90,
           var_names=["mu_West", "mu_East", "dmeans", "sigma_West", "sigma_East", "dstd", "nu", "cohend"])
mean sd hdi_5% hdi_95%
mu_West 9.123 0.587 8.233 10.093
mu_East 6.150 0.302 5.695 6.687
dmeans 2.973 0.650 1.940 4.064
sigma_West 1.589 0.492 0.847 2.261
sigma_East 0.904 0.260 0.513 1.267
dstd 0.685 0.564 -0.177 1.576
nu 20.687 13.481 2.018 39.852
cohend 2.379 0.686 1.250 3.436
from ministats import plot_dmeans_stats
plot_dmeans_stats(mod1, idata1, group_name="loc");

Compare to frequentist results#

from scipy.stats import ttest_ind
pricesW = eprices[eprices["loc"]=="West"]["price"]
pricesE = eprices[eprices["loc"]=="East"]["price"]

res1 = ttest_ind(pricesW, pricesE, equal_var=False)
res1.statistic, res1.pvalue
(5.022875513276464, 0.0002570338337217609)
res1.confidence_interval(confidence_level=0.9)
ConfidenceInterval(low=1.9396575883681466, high=4.060342411631854)
from ministats import cohend2
cohend2(pricesW, pricesE)
2.3678062243290983

Conclusions#

Example 2: comparing IQ scores#

We’ll look at IQ scores data taken from a the paper Bayesian Estimation Supersedes the t-Test (BEST) by John K. Kruschke.

smart drug administered to treatment group and want to compare to control group. Data contains outliers)

cf. compare_iqs2_many_ways.ipynb

IQ scores dataset#

iqs2 = pd.read_csv("../datasets/iqs2.csv")
iqs2.groupby("group")["iq"].describe()
count mean std min 25% 50% 75% max
group
ctrl 42.0 100.357143 2.516496 88.0 100.0 100.5 101.0 105.0
treat 47.0 101.914894 6.021085 82.0 100.0 102.0 103.0 124.0
sns.stripplot(data=iqs2, x="iq", y="group", hue="group");

Bayesian model#

TODO: add formulas

Bambi model#

formula2 = bmb.Formula("iq ~ 0 + group",
                       "sigma ~ 0 + group")

priors2 = {
    "group": bmb.Prior("Normal", mu=100, sigma=35),
    "sigma": {
        "group": bmb.Prior("Normal", mu=1, sigma=2)
    },
    "nu": bmb.Prior("Gamma", alpha=2, beta=0.1),
}


mod2 = bmb.Model(formula=formula2,
                 family="t",
                 # link={"mu":"identity", "sigma":"log"}, # Bambi defaults
                 priors=priors2,
                 data=iqs2)

mod2
       Formula: iq ~ 0 + group
                sigma ~ 0 + group
        Family: t
          Link: mu = identity
                sigma = log
  Observations: 89
        Priors: 
    target = mu
        Common-level effects
            group ~ Normal(mu: 100.0, sigma: 35.0)
        
        Auxiliary parameters
            nu ~ Gamma(alpha: 2.0, beta: 0.1)
    target = sigma
        Common-level effects
            sigma_group ~ Normal(mu: 1.0, sigma: 2.0)
mod2.build()
mod2.graph()
../_images/f34b3669ee160444604180e7dffaa5f30554f452562539e464af0d267d04cd71.svg

Model fitting and analysis#

idata2 = mod2.fit(random_seed=[42,43,44,45])
/opt/hostedtoolcache/Python/3.10.16/x64/lib/python3.10/site-packages/pymc/sampling/mcmc.py:764: UserWarning: A list or tuple of random_seed no longer specifies the specific random_seed of each chain. Use a single seed instead.
  warnings.warn(
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [nu, group, sigma_group]

Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 2 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics
from ministats import calc_dmeans_stats
calc_dmeans_stats(idata2, group_name="group");
az.summary(idata2, kind="stats", hdi_prob=0.95,
           var_names=["mu_treat", "mu_ctrl", "dmeans", "sigma_treat", "sigma_ctrl", "dstd", "nu", "cohend"])
mean sd hdi_2.5% hdi_97.5%
mu_treat 101.539 0.384 100.773 102.286
mu_ctrl 100.523 0.200 100.130 100.919
dmeans 1.016 0.432 0.122 1.840
sigma_treat 2.008 0.430 1.242 2.908
sigma_ctrl 1.030 0.204 0.658 1.416
dstd 0.977 0.442 0.079 1.813
nu 1.862 0.488 1.051 2.857
cohend 0.657 0.303 0.074 1.280
from ministats import plot_dmeans_stats
plot_dmeans_stats(mod2, idata2, group_name="group", ppc_xlims=[82,124]);

Compare to frequentist results#

from scipy.stats import ttest_ind

treated = iqs2[iqs2["group"]=="treat"]["iq"].values
controls = iqs2[iqs2["group"]=="ctrl"]["iq"].values

res2 = ttest_ind(treated, controls, equal_var=False)
res2.statistic, res2.pvalue
(1.622190457290228, 0.10975381983712831)
res2.confidence_interval(confidence_level=0.95)
ConfidenceInterval(low=-0.36118477236636837, high=3.4766862921232016)
from ministats import cohend2
cohend2(treated, controls)
0.3309654530139732
# MAYBE
# # Test if the variances of the two groups are the same
# from scipy.stats import levene
# levene(treated, controls)

Conclusions#

Explanations#

Alternative choices of priors#

e.g. prior for sigma as very wiiiiiide, and nu ~ Exp(1/29)+1 → BEST Bayesian estimation supersedes the t-test by John K. Kruschke

If we use a model with common standard deviation, we get equivalent of pooled sigma

Sensitivity analysis and robustness checks#

# TODO

Performance tests#

Simulated datasets#

Simulate data grid consisting of ∆ * outliers * n * m

Model comparisons#

Compare results several models:

  • N model

  • T model (robust)

  • BayesFactor based on JZS prior

  • Classical two-sample t-test (from Section 4.5)

Discussion#

Comparison to the frequentist two-sample t-test#

  • results numerically similar

  • note we’re using 𝒯 as data model, not as a sampling distribution

  • conceptually different:

    • p-value vs. decision based on posterior distribution

    • confidence intervals vs. credible intervals

Comparing multiple groups#

  • ?Extension to Bayesian ANOVA? Can extend approach to multiple groups: Bayesian ANOVA

FWD reference to hierarchical models for group comparison covered in Section 5.5

Exercises#

Exercise 1: small samples#

As = [5.77, 5.33, 4.59, 4.33, 3.66, 4.48]
Bs = [3.88, 3.55, 3.29, 2.59, 2.33, 3.59]
groups = ["A"]*len(As) + ["B"]*len(Bs)
df1 = pd.DataFrame({"group": groups, "vals": As + Bs})
# df1

Exercise 2: lecture and debate curriculums#

students = pd.read_csv("../datasets/students.csv")
# students.groupby("curriculum")["score"].describe()
formula_std = bmb.Formula("score ~ 0 + curriculum",
                          "sigma ~ 0 + curriculum")
priors_std = {
    "curriculum": bmb.Prior("Normal", mu=70, sigma=30),
    "sigma": {
        "curriculum":bmb.Prior("Normal", mu=1, sigma=2)
    },
    "nu": bmb.Prior("Gamma", alpha=2, beta=0.1),
}
mod_std = bmb.Model(formula=formula_std,
                 family="t",
                 link="identity",
                 priors=priors_std,
                 data=students)
mod_std
# mod_std.build()
# mod_std.graph()
       Formula: score ~ 0 + curriculum
                sigma ~ 0 + curriculum
        Family: t
          Link: mu = identity
                sigma = log
  Observations: 15
        Priors: 
    target = mu
        Common-level effects
            curriculum ~ Normal(mu: 70.0, sigma: 30.0)
        
        Auxiliary parameters
            nu ~ Gamma(alpha: 2.0, beta: 0.1)
    target = sigma
        Common-level effects
            sigma_curriculum ~ Normal(mu: 1.0, sigma: 2.0)
idata_std = mod_std.fit()
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [nu, curriculum, sigma_curriculum]

Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 2 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics
# Calculate the difference between between means
currs = idata_std["posterior"]["curriculum"]
mu_debate = currs.sel(curriculum_dim="debate")
mu_lecture = currs.sel(curriculum_dim="lecture")
idata_std["posterior"]["dmeans"] = mu_debate - mu_lecture
az.summary(idata_std, kind="stats", var_names=["dmeans"], hdi_prob=0.95)
mean sd hdi_2.5% hdi_97.5%
dmeans 7.399 5.258 -2.861 17.366
# from ministats import plot_dmeans_stats
# plot_dmeans_stats(mod_std, idata_std, ppc_xlims=[50,100], group_name="curriculum");
# from scipy.stats import ttest_ind
# scoresD = students[students["curriculum"]=="debate"]["score"]
# scoresL = students[students["curriculum"]=="lecture"]["score"]
# res_std = ttest_ind(scoresD, scoresL, equal_var=False)
# res_std.statistic, res_std.pvalue
# from ministats import cohend2
# cohend2(scoresL, scoresD)

Exercise 3: redo exercises from Section 3.5 section using Bayesian methods#

# TODO

BONUS Examples#

Example 4: small example form BEST vignette#

See http://cran.nexr.com/web/packages/BEST/vignettes/BEST.pdf#page=2

y1s = [5.77, 5.33, 4.59, 4.33, 3.66, 4.48]
y2s = [3.88, 3.55, 3.29, 2.59, 2.33, 3.59]

from ministats.bayes import bayes_dmeans
mod4, idata4 = bayes_dmeans(y1s, y2s, groups=["y1", "y2"])
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [nu, Intercept, group, sigma_group]

Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 3 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics
from ministats import calc_dmeans_stats
calc_dmeans_stats(idata4)
az.summary(idata4, kind="stats", hdi_prob=0.95,
           var_names=["dmeans", "sigma_y1", "sigma_y2", "dstd", "nu", "cohend"])
mean sd hdi_2.5% hdi_97.5%
dmeans -1.436 0.477 -2.433 -0.549
sigma_y1 0.852 0.340 0.368 1.530
sigma_y2 0.719 0.268 0.327 1.282
dstd -0.133 0.428 -0.960 0.748
nu 21.391 14.443 1.349 49.059
cohend -1.911 0.767 -3.497 -0.556
# from ministats import plot_dmeans_stats
# plot_dmeans_stats(mod4, idata4, ppc_xlims=None);

Example 5: comparing morning to evening#

treszkai/best

morning = [8.99, 9.21, 9.03, 9.15, 8.68, 8.82, 8.66, 8.82, 8.59, 8.14,
           9.09, 8.80, 8.18, 9.23, 8.55, 9.03, 9.36, 9.06, 9.57, 8.38]
evening = [9.82, 9.34, 9.73, 9.93, 9.33, 9.41, 9.48, 9.14, 8.62, 8.60,
           9.60, 9.41, 8.43, 9.77, 8.96, 9.81, 9.75, 9.50, 9.90, 9.13]
from ministats.bayes import bayes_dmeans
mod5, idata5 = bayes_dmeans(evening, morning, groups=["evening", "morning"])
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [nu, Intercept, group, sigma_group]

Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 3 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics
from ministats import calc_dmeans_stats
calc_dmeans_stats(idata5)
az.summary(idata5, kind="stats", hdi_prob=0.95,
           var_names=["dmeans", "sigma_evening", "sigma_morning", "dstd", "nu", "cohend"])
mean sd hdi_2.5% hdi_97.5%
dmeans -0.528 0.141 -0.808 -0.256
sigma_evening 0.455 0.086 0.307 0.620
sigma_morning 0.389 0.074 0.249 0.536
dstd -0.066 0.112 -0.279 0.157
nu 23.069 14.566 3.229 51.788
cohend -1.262 0.377 -2.015 -0.535
# from ministats import plot_dmeans_stats
# plot_dmeans_stats(mod5, idata5, ppc_xlims=None);