Section 5.4 — Bayesian difference between means#

This notebook contains the code examples from Section 5.4 Bayesian difference between means from the No Bullshit Guide to Statistics.

See also:

Notebook setup#

# load Python modules
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import bambi as bmb
import arviz as az
WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.
# Figures setup
plt.clf()  # needed otherwise `sns.set_theme` doesn"t work
from plot_helpers import RCPARAMS
RCPARAMS.update({"figure.figsize": (5, 3)})   # good for screen
# RCPARAMS.update({"figure.figsize": (5, 1.6)})  # good for print
sns.set_theme(
    context="paper",
    style="whitegrid",
    palette="colorblind",
    rc=RCPARAMS,
)

# High-resolution please
%config InlineBackend.figure_format = "retina"

# Where to store figures
from ministats.utils import savefigure
DESTDIR = "figures/bayes/dmeans"
<Figure size 640x480 with 0 Axes>
# set random seed for repeatability
np.random.seed(42)
#######################################################

Introduciton#

We’ll now revisit the problem of comparison between two groups from a Bayesian perspective.

Model#

The data models for the two groups are based on Student’s \(t\)-distribution:

\[ X \sim \mathcal{T}(\nu, M_X, \Sigma_X) \qquad \text{and} \qquad Y \sim \mathcal{T}(\nu, M_Y, \Sigma_Y). \]

We’ll use a normal prior for the means, half-\(t\) priors for standard deviations, and a gamma distribution for the degrees of freedom parameter:

\[ M_X, M_Y \sim \mathcal{N}(\mu_{M}, \sigma_{M}), \quad \Sigma_X, \Sigma_Y \sim \mathcal{T}^+\!(\nu_{\Sigma}, \sigma_{\Sigma}), \quad \nu \sim \textrm{Gamma}(\alpha,\beta). \]

Bambi formula objects#

import bambi as bmb
formula = bmb.Formula("y ~ 0 + group",
                      "sigma ~ 0 + group")
formula
Formula('y ~ 0 + group', 'sigma ~ 0 + group')
formula.main, formula.additionals
('y ~ 0 + group', ('sigma ~ 0 + group',))
formula.get_all_formulas()
['y ~ 0 + group', 'sigma ~ 0 + group']

Choosing priors for log-sigma#

Normal distribution for log-sigma#

# FIGURES ONLY
from scipy.stats import norm

def get_lognormal(mu=0, sigma=1):
    logsigs = np.linspace(-5, 5, 1000)
    dlogsigs = norm(mu, sigma).pdf(logsigs)
    sigmas = np.exp(logsigs)
    # Apply the change of variables to get the density for sigma
    # based on the Jacobian |d(log(sigma))/d(sigma)| = 1/sigma
    dsigmas = dlogsigs / sigmas
    return sigmas, dsigmas


with plt.rc_context({"figure.figsize":(6,2.2)}):
    fig, (ax1, ax2) = plt.subplots(1,2, sharey=True)

    # PLOT 1
    mu, sigma = 0, 1
    sigmas, dsigmas = get_lognormal(mu, sigma)
    sns.lineplot(x=sigmas, y=dsigmas, ax=ax1)
    ax1.set_xlabel('$\\sigma$')
    ax1.set_ylabel('density')
    ax1.set_title(f'(a) LogNorm({mu},{sigma}) = exp($\\mathcal{{N}}$({mu},{sigma}))')
    ax1.set_xlim(0, 10)
    ax1.set_ylim(0, 0.7)

    # PLOT 2
    mu, sigma = 1, 2
    sigmas, dsigmas = get_lognormal(mu, sigma)
    sns.lineplot(x=sigmas, y=dsigmas, ax=ax2)
    ax2.set_xlabel('$\\sigma$')
    ax2.set_ylabel('density')
    ax2.set_title(f'(b) LogNorm({mu},{sigma}) = exp($\\mathcal{{N}}$({mu},{sigma}))')
    ax2.set_xlim(0, 10)
    # ax2.set_ylim(0, 0.7)

    # filename = os.path.join(DESTDIR, "log_sigma_normal_examples.pdf")
    # savefigure(fig, filename)
../_images/9d3bbf15768b65e7ccad0e96d479379a05d3a9eb594730657052424e7752d1a2.png

Choosing priors for the degrees of freedom parameter#

# FIGURES ONLY
from scipy.stats import gamma
from scipy.stats import expon
from ministats import plot_pdf

with plt.rc_context({"figure.figsize":(6,2.2)}):
    fig, (ax1, ax2) = plt.subplots(1,2, sharey=True)

    # PLOT 1
    alpha, beta = 2, 0.1
    rv_nu = gamma(a=2, scale=1/beta)
    plot_pdf(rv_nu, rv_name="ν", ax=ax1)
    ax1.set_title(f'(a) Gamma($\\alpha=${alpha}, $\\beta=${beta})')
    ax1.set_xlim(0, 100)

    # PLOT 2
    scale = 30
    rv_nu_exp = expon(scale=scale)
    plot_pdf(rv_nu_exp, rv_name="ν", ax=ax2)
    ax2.set_title(f'(b) Expon($\\lambda=$1/{scale})')
    ax2.set_xlim(0, 100)

    # filename = os.path.join(DESTDIR, "gamma_and_exp_examples.pdf")
    # savefigure(fig, filename)
../_images/cc5a57219b28d7fc4c4d85a5815266d01d8b7025280504e990a0f82e77f3a60d.png

Example 1: comparing electricity prices#

Electricity prices from East End and West End

Electricity prices dataset#

eprices = pd.read_csv("../datasets/eprices.csv")
eprices.groupby("loc")["price"].describe()
count mean std min 25% 50% 75% max
loc
East 9.0 6.155556 0.877655 4.8 5.5 6.3 6.5 7.7
West 9.0 9.155556 1.562139 6.8 8.3 8.6 10.0 11.8
eprices["price"].mean(), eprices["price"].std()
(7.655555555555556, 1.973120020267162)

Bayesian model#

\[\begin{align*} X_W \sim \mathcal{T}(\nu, M_W, \Sigma_W), \qquad \qquad X_E \sim \mathcal{T}(\nu, M_E, \Sigma_E), \qquad\qquad \\ M_W, M_E \!\sim\! \mathcal{N}(\mu_{M}, \sigma_{M}), \quad \log\Sigma_W, \log\Sigma_E\!\sim\!\mathcal{N}(0,1), \quad \nu \!\sim\! \text{Gamma}(2,0.1). \end{align*}\]

Bambi model#

formula1 = bmb.Formula("price ~ 0 + loc",
                       "sigma ~ 0 + loc")

links1 = {"mu":"identity",
          "sigma":"log"}

priors1 = {
    "loc": bmb.Prior("Normal", mu=8, sigma=5),
    "sigma": {
        "loc": bmb.Prior("Normal", mu=0, sigma=1)
    },
    "nu": bmb.Prior("Gamma", alpha=2, beta=0.1),
}

mod1 = bmb.Model(formula=formula1,
                 family="t",
                 link=links1,
                 priors=priors1,
                 data=eprices)
mod1
       Formula: price ~ 0 + loc
                sigma ~ 0 + loc
        Family: t
          Link: mu = identity
                sigma = log
  Observations: 18
        Priors: 
    target = mu
        Common-level effects
            loc ~ Normal(mu: 8.0, sigma: 5.0)
        
        Auxiliary parameters
            nu ~ Gamma(alpha: 2.0, beta: 0.1)
    target = sigma
        Common-level effects
            sigma_loc ~ Normal(mu: 0.0, sigma: 1.0)
mod1.build()
mod1.graph()

# # FIGURES ONLY
# filename = os.path.join(DESTDIR, "example1_eprices_mod1_graph")
# mod1.graph(name=filename, fmt="png", dpi=300)
../_images/d66df3f8e4d6000af49e7f78299b582070d437c86c99b57672971a4e1513f424.svg

Prior predictive checks#

# TODO

Model fitting and analysis#

idata1 = mod1.fit(random_seed=[42,43,44,45])
/opt/hostedtoolcache/Python/3.10.16/x64/lib/python3.10/site-packages/pymc/sampling/mcmc.py:736: UserWarning: A list or tuple of random_seed no longer specifies the specific random_seed of each chain. Use a single seed instead.
  warnings.warn(
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [nu, loc, sigma_loc]

Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 2 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics

Processing the results#

Calculate derived quantities used for the analysis plots and summaries.

post1 = idata1["posterior"]

# Calculate sigmas from log-sigmas
#######################################################
logsig_W = post1["sigma_loc"].sel(sigma_loc_dim="West")
logsig_E = post1["sigma_loc"].sel(sigma_loc_dim="East")
post1["sigma_West"] = np.exp(logsig_W)
post1["sigma_East"] = np.exp(logsig_E)

# Calculate the difference between between means
post1["mu_West"] = post1["loc"].sel(loc_dim="West")
post1["mu_East"] = post1["loc"].sel(loc_dim="East")
post1["dmeans"] = post1["mu_West"] - post1["mu_East"]

# Calculate the difference between standard deviations
post1["dstd"] = post1["sigma_West"]-post1["sigma_East"]

# Effect size
#######################################################
pvar =(post1["sigma_West"]**2+post1["sigma_East"]**2)/2
post1["cohend"] = post1["dmeans"] / np.sqrt(pvar)
# #ALT: use helper function
# from ministats import calc_dmeans_stats
# calc_dmeans_stats(idata1, group_name="loc")
az.summary(idata1, kind="stats", hdi_prob=0.90,
           var_names=["mu_West", "mu_East", "dmeans", "sigma_West", "sigma_East", "dstd", "nu", "cohend"])
mean sd hdi_5% hdi_95%
mu_West 9.099 0.567 8.142 9.955
mu_East 6.154 0.303 5.702 6.691
dmeans 2.945 0.633 1.868 3.873
sigma_West 1.571 0.459 0.924 2.267
sigma_East 0.919 0.267 0.544 1.352
dstd 0.653 0.536 -0.175 1.521
nu 20.798 13.632 2.225 39.563
cohend 2.368 0.704 1.159 3.403
from ministats import plot_dmeans_stats
plot_dmeans_stats(mod1, idata1, group_name="loc");
../_images/2e3ce6c938111f63fa4d0f6d693f9391ec1aa54f7b93fdf801413ab5915e2ced.png
# # FIGURES ONLY
# from ministats import plot_dmeans_stats
# plot_dmeans_stats(mod1, idata1, group_name="loc",
#                   figsize=(6,6.5), ppc_xlims=[2,15], ppc_ylims=[0,0.7]);
# filename = os.path.join(DESTDIR, "example1_dmeans_plots.pdf")
# savefigure(plt.gcf(), filename)

Compare to frequentist results#

from scipy.stats import ttest_ind
pricesW = eprices[eprices["loc"]=="West"]["price"]
pricesE = eprices[eprices["loc"]=="East"]["price"]

res1 = ttest_ind(pricesW, pricesE, equal_var=False)
res1.statistic, res1.pvalue
(5.022875513276464, 0.0002570338337217609)
res1.confidence_interval(confidence_level=0.9)
ConfidenceInterval(low=1.9396575883681466, high=4.060342411631854)
from ministats import cohend2
cohend2(pricesW, pricesE)
2.3678062243290983

Conclusions#

Example 2: comparing IQ scores#

We’ll look at IQ scores data taken from a the paper Bayesian Estimation Supersedes the t-Test (BEST) by John K. Kruschke.

smart drug administered to treatment group and want to compare to control group. Data contains outliers)

cf. compare_iqs2_many_ways.ipynb

IQ scores dataset#

iqs2 = pd.read_csv("../datasets/exercises/iqs2.csv")
iqs2.groupby("group")["iq"].describe()
count mean std min 25% 50% 75% max
group
ctrl 42.0 100.357143 2.516496 88.0 100.0 100.5 101.0 105.0
treat 47.0 101.914894 6.021085 82.0 100.0 102.0 103.0 124.0
sns.stripplot(data=iqs2, x="iq", y="group", hue="group");
../_images/87e9fbf65e091e13c25c8f2553f33a1367fb5a7c6ebe709dd31a04617c4ce445.png
# # FIUGRES ONLY
# with plt.rc_context({"figure.figsize":(6,2)}):
#     ax = sns.stripplot(data=iqs2, x="iq", y="group", hue="group", jitter=0.4, alpha=0.5);
#     ax.set_xticks( range(80,130,5) )
#     filename = os.path.join(DESTDIR, "example2_iqs2_stripplot.pdf")
#     savefigure(ax, filename)

Bayesian model#

TODO: add formulas

Bambi model#

formula2 = bmb.Formula("iq ~ 0 + group",
                       "sigma ~ 0 + group")

priors2 = {
    "group": bmb.Prior("Normal", mu=100, sigma=35),
    "sigma": {
        "group": bmb.Prior("Normal", mu=1, sigma=2)
    },
    "nu": bmb.Prior("Gamma", alpha=2, beta=0.1),
}


mod2 = bmb.Model(formula=formula2,
                 family="t",
                 # link={"mu":"identity", "sigma":"log"}, # Bambi defaults
                 priors=priors2,
                 data=iqs2)

mod2
       Formula: iq ~ 0 + group
                sigma ~ 0 + group
        Family: t
          Link: mu = identity
                sigma = log
  Observations: 89
        Priors: 
    target = mu
        Common-level effects
            group ~ Normal(mu: 100.0, sigma: 35.0)
        
        Auxiliary parameters
            nu ~ Gamma(alpha: 2.0, beta: 0.1)
    target = sigma
        Common-level effects
            sigma_group ~ Normal(mu: 1.0, sigma: 2.0)
mod2.build()
mod2.graph()

# # FIGURES ONLY
# filename = os.path.join(DESTDIR, "example2_iqs_mod2_graph")
# mod2.graph(name=filename, fmt="png", dpi=300)
../_images/3b49afdf084c20d5901351723cc76c243e9d375534670865fc31b8486079d8bd.svg

Model fitting and analysis#

idata2 = mod2.fit(random_seed=[42,43,44,45])
/opt/hostedtoolcache/Python/3.10.16/x64/lib/python3.10/site-packages/pymc/sampling/mcmc.py:736: UserWarning: A list or tuple of random_seed no longer specifies the specific random_seed of each chain. Use a single seed instead.
  warnings.warn(
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [nu, group, sigma_group]

Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 2 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics
from ministats import calc_dmeans_stats
calc_dmeans_stats(idata2, group_name="group");
az.summary(idata2, kind="stats", hdi_prob=0.95,
           var_names=["mu_treat", "mu_ctrl", "dmeans", "sigma_treat", "sigma_ctrl", "dstd", "nu", "cohend"])
mean sd hdi_2.5% hdi_97.5%
mu_treat 101.530 0.382 100.824 102.323
mu_ctrl 100.523 0.196 100.136 100.912
dmeans 1.007 0.432 0.144 1.874
sigma_treat 2.022 0.423 1.242 2.868
sigma_ctrl 1.033 0.198 0.670 1.417
dstd 0.988 0.434 0.148 1.813
nu 1.870 0.471 1.080 2.793
cohend 0.646 0.297 0.057 1.225
from ministats import plot_dmeans_stats
plot_dmeans_stats(mod2, idata2, group_name="group", ppc_xlims=[82,124]);
../_images/d9e26e54fb040cae06cb46e384a0f4662a4a4042cb82ae7ec1d20a689d442c8d.png
# # FIGURES ONLY
# plot_dmeans_stats(mod2, idata2, group_name="group",
#                   figsize=(6,6.5), ppc_xlims=[82,124], ppc_ylims=[0,0.3]);
# filename = os.path.join(DESTDIR, "example2_dmeans_plots.pdf")
# savefigure(plt.gcf(), filename)

Compare to frequentist results#

from scipy.stats import ttest_ind

treated = iqs2[iqs2["group"]=="treat"]["iq"].values
controls = iqs2[iqs2["group"]=="ctrl"]["iq"].values

res2 = ttest_ind(treated, controls, equal_var=False)
res2.statistic, res2.pvalue
(1.622190457290228, 0.10975381983712831)
res2.confidence_interval(confidence_level=0.95)
ConfidenceInterval(low=-0.36118477236636837, high=3.4766862921232016)
from ministats import cohend2
cohend2(treated, controls)
0.3309654530139732
# MAYBE
# # Test if the variances of the two groups are the same
# from scipy.stats import levene
# levene(treated, controls)

Conclusions#

Explanations#

Alternative choices of priors#

e.g. prior for sigma as very wiiiiiide, and nu ~ Exp(1/29)+1 → BEST Bayesian estimation supersedes the t-test by John K. Kruschke

If we use a model with common standard deviation, we get equivalent of pooled sigma

Sensitivity analysis and robustness checks#

# TODO

Performance tests#

Simulated datasets#

Simulate data grid consisting of ∆ * outliers * n * m

Model comparisons#

Compare results several models:

  • N model

  • T model (robust)

  • BayesFactor based on JZS prior

  • Classical two-sample t-test (from Section 4.5)

Discussion#

Comparison to the frequentist two-sample t-test#

  • results numerically similar

  • note we’re using 𝒯 as data model, not as a sampling distribution

  • conceptually different:

    • p-value vs. decision based on posterior distribution

    • confidence intervals vs. credible intervals

Comparing multiple groups#

  • ?Extension to Bayesian ANOVA? Can extend approach to multiple groups: Bayesian ANOVA

FWD reference to hierarchical models for group comparison covered in Section 5.5

Exercises#

Exercise 1: small samples#

As = [5.77, 5.33, 4.59, 4.33, 3.66, 4.48]
Bs = [3.88, 3.55, 3.29, 2.59, 2.33, 3.59]
groups = ["A"]*len(As) + ["B"]*len(Bs)
df1 = pd.DataFrame({"group": groups, "vals": As + Bs})
# df1

Exercise 2: lecture and debate curriculums#

students = pd.read_csv("../datasets/students.csv")
# students.groupby("curriculum")["score"].describe()
formula_std = bmb.Formula("score ~ 0 + curriculum",
                          "sigma ~ 0 + curriculum")
priors_std = {
    "curriculum": bmb.Prior("Normal", mu=70, sigma=30),
    "sigma": {
        "curriculum":bmb.Prior("Normal", mu=1, sigma=2)
    },
    "nu": bmb.Prior("Gamma", alpha=2, beta=0.1),
}
mod_std = bmb.Model(formula=formula_std,
                 family="t",
                 link="identity",
                 priors=priors_std,
                 data=students)
mod_std
# mod_std.build()
# mod_std.graph()
       Formula: score ~ 0 + curriculum
                sigma ~ 0 + curriculum
        Family: t
          Link: mu = identity
                sigma = log
  Observations: 15
        Priors: 
    target = mu
        Common-level effects
            curriculum ~ Normal(mu: 70.0, sigma: 30.0)
        
        Auxiliary parameters
            nu ~ Gamma(alpha: 2.0, beta: 0.1)
    target = sigma
        Common-level effects
            sigma_curriculum ~ Normal(mu: 1.0, sigma: 2.0)
idata_std = mod_std.fit()
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [nu, curriculum, sigma_curriculum]

Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 2 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics
# Calculate the difference between between means
currs = idata_std["posterior"]["curriculum"]
mu_debate = currs.sel(curriculum_dim="debate")
mu_lecture = currs.sel(curriculum_dim="lecture")
idata_std["posterior"]["dmeans"] = mu_debate - mu_lecture
az.summary(idata_std, kind="stats", var_names=["dmeans"], hdi_prob=0.95)
mean sd hdi_2.5% hdi_97.5%
dmeans 7.353 5.289 -2.038 18.714
# from ministats import plot_dmeans_stats
# plot_dmeans_stats(mod_std, idata_std, ppc_xlims=[50,100], group_name="curriculum");
# from scipy.stats import ttest_ind
# scoresD = students[students["curriculum"]=="debate"]["score"]
# scoresL = students[students["curriculum"]=="lecture"]["score"]
# res_std = ttest_ind(scoresD, scoresL, equal_var=False)
# res_std.statistic, res_std.pvalue
# from ministats import cohend2
# cohend2(scoresL, scoresD)

Exercise 3: redo exercises from Section 3.5 section using Bayesian methods#

# TODO

BONUS Examples#

Example 4: small example form BEST vignette#

See http://cran.nexr.com/web/packages/BEST/vignettes/BEST.pdf#page=2

y1s = [5.77, 5.33, 4.59, 4.33, 3.66, 4.48]
y2s = [3.88, 3.55, 3.29, 2.59, 2.33, 3.59]

from ministats.bayes import bayes_dmeans
mod4, idata4 = bayes_dmeans(y1s, y2s, groups=["y1", "y2"])
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [nu, Intercept, group, sigma_group]

Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 3 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics
from ministats import calc_dmeans_stats
calc_dmeans_stats(idata4)
az.summary(idata4, kind="stats", hdi_prob=0.95,
           var_names=["dmeans", "sigma_y1", "sigma_y2", "dstd", "nu", "cohend"])
mean sd hdi_2.5% hdi_97.5%
dmeans -1.444 0.509 -2.456 -0.416
sigma_y1 0.865 0.356 0.364 1.566
sigma_y2 0.715 0.272 0.312 1.255
dstd -0.150 0.442 -1.134 0.653
nu 20.600 13.699 1.624 47.766
cohend -1.913 0.795 -3.451 -0.352
# from ministats import plot_dmeans_stats
# plot_dmeans_stats(mod4, idata4, ppc_xlims=None);

Example 5: comparing morning to evening#

treszkai/best

morning = [8.99, 9.21, 9.03, 9.15, 8.68, 8.82, 8.66, 8.82, 8.59, 8.14,
           9.09, 8.80, 8.18, 9.23, 8.55, 9.03, 9.36, 9.06, 9.57, 8.38]
evening = [9.82, 9.34, 9.73, 9.93, 9.33, 9.41, 9.48, 9.14, 8.62, 8.60,
           9.60, 9.41, 8.43, 9.77, 8.96, 9.81, 9.75, 9.50, 9.90, 9.13]
from ministats.bayes import bayes_dmeans
mod5, idata5 = bayes_dmeans(evening, morning, groups=["evening", "morning"])
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [nu, Intercept, group, sigma_group]

Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 3 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics
from ministats import calc_dmeans_stats
calc_dmeans_stats(idata5)
az.summary(idata5, kind="stats", hdi_prob=0.95,
           var_names=["dmeans", "sigma_evening", "sigma_morning", "dstd", "nu", "cohend"])
mean sd hdi_2.5% hdi_97.5%
dmeans -0.528 0.142 -0.820 -0.264
sigma_evening 0.456 0.088 0.304 0.635
sigma_morning 0.386 0.072 0.259 0.526
dstd -0.070 0.112 -0.304 0.143
nu 22.786 14.200 2.670 50.157
cohend -1.267 0.380 -1.965 -0.479
# from ministats import plot_dmeans_stats
# plot_dmeans_stats(mod5, idata5, ppc_xlims=None);