Section 2.2 — Multiple random variables#

This notebook contains all the code examples from Section 2.2 Multiple random variables in the No Bullshit Guide to Statistics.

Notebook setup#

# load Python modules
import numpy as np               # numerical calculations
import matplotlib.pyplot as plt  # generic plotting functions
import seaborn as sns            # plotting distributions
# Figures setup
sns.set_theme(
    context="paper",
    style="whitegrid",
    palette="colorblind",
    rc={'figure.figsize': (7,4)},
)
%config InlineBackend.figure_format = 'retina'
# set random seed for repeatability
np.random.seed(42)

Definitions#

Example 1: two coin tosses#

def fC(c):
    if c in {"heads", "tails"}:
        return 1/2
    else:
        return 0

def fC1C2(c1,c2):
    return fC(c1)*fC(c2)

xs, ys, fXYs = [], [], []
map_int_to_label = {0: "heads", 1: "tails"}
for x in range(0,1+1):
    for y in range(0,1+1):
        xs.append(x)
        ys.append(y)
        fXYxy = fC1C2(map_int_to_label[x], map_int_to_label[y])
        fXYs.append(fXYxy)

fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
ax.stem(xs, ys, fXYs, basefmt=" ")
ax.set_xticks([0,1])
ax.set_xticklabels(["heads", "tails"])
ax.set_yticks([0,1])
ax.set_yticklabels(["heads", "tails"])
ax.set_xlabel("$c_1$")
ax.set_ylabel("$c_2$")
ax.set_zlabel("$f_{C_1C_2}$")
ax.set_zlim([0, 0.3]);
../_images/a802e98e729a66fefe0f59aa8ae7e5909c27ec27be30844c6019cc1c11640bbd.png

Example 2: rolling two dice#

def fD(d):
    if d in {1,2,3,4,5,6}:
        return 1/6
    else:
        return 0

def fD1D2(d1,d2):
    return fD(d1)*fD(d2)

xs, ys, fXYs = [], [], []
for x in range(1,6+1):
    for y in range(1,6+1):
        xs.append(x)
        ys.append(y)
        fXYxy = fD1D2(x,y)
        fXYs.append(fXYxy)

fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
ax.stem(xs, ys, fXYs, basefmt=" ")
ax.set_xlabel("$d_1$")
ax.set_ylabel("$d_2$")
ax.set_zlabel("$f_{D_1D_2}$")
ax.set_zlim([0, 0.05]);
../_images/997278cc5d12673567157984acf12f8e57735d417463ce55d855a500f5e41c57.png
# PLOT D_1 + D_2 = 7
xs, ys, fXYs = [], [], []
for x in range(1,6+1):
    y = 7 - x
    xs.append(x)
    ys.append(y)
    fXYxy = fD1D2(x,y)
    fXYs.append(fXYxy)

fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
ax.stem(xs, ys, fXYs, basefmt=" ")
ax.set_xlabel("$d_1$")
ax.set_ylabel("$d_2$")
ax.set_zlabel("$f_{D_1D_2}$")
ax.set_zlim([0, 0.05]);
../_images/ccd32fdf5b519e958f8ff697b761da1ee6ed57629c3f835021bffb6bb5e570cd.png

Example 3: coin-dependent dice roll#

def fC(c):
    if c in {"heads", "tails"}:
        return 1/2
    else:
        return 0

def fD(d):
    if d in {1,2,3,4,5,6}:
        return 1/6
    else:
        return 0

def fD4(d):
    if d in {1,2,3,4}:
        return 1/4
    else:
        return 0

def fCD(c,d):
    if c == "heads":
        return fC(c)*fD(d)
    elif c == "tails":
        return fC(c)*fD4(d)


xs, ys, fXYs = [], [], []
map_int_to_label = {0: "heads", 1: "tails"}
for x in range(0,1+1):
    for y in range(1,6+1):
        xs.append(x)
        ys.append(y)
        fXYxy = fCD(map_int_to_label[x],y)
        fXYs.append(fXYxy)

fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
ax.stem(xs, ys, fXYs, basefmt=" ")
ax.set_xticks([0,1])
ax.set_xticklabels(["heads", "tails"])
ax.set_xlabel("$c$")
ax.set_ylabel("$d$")
ax.set_zlabel("$f_{CD}$")
ax.set_zlim([0, 0.2]);
../_images/daf6b440ece6be27f9d5717d23ef82c5efd86da06454f9084b8b4821f2063e64.png
fXYs
[0.08333333333333333,
 0.08333333333333333,
 0.08333333333333333,
 0.08333333333333333,
 0.08333333333333333,
 0.08333333333333333,
 0.125,
 0.125,
 0.125,
 0.125,
 0.0,
 0.0]
sum(fXYs)
1.0
# f_{Y|X}(y|heads)  [as a list]
[fCD("heads",d) for d in range(1,6+1)]
[0.08333333333333333,
 0.08333333333333333,
 0.08333333333333333,
 0.08333333333333333,
 0.08333333333333333,
 0.08333333333333333]
# f_{Y|X}(y|tails)  [as a list]
[fCD("tails",d) for d in range(1,6+1)]
[0.125, 0.125, 0.125, 0.125, 0.0, 0.0]
# f_Y(y)    [as a list]
fYs = [fCD("heads",d)+fCD("tails",d) for d in range(1,6+1)]
fYs
[0.20833333333333331,
 0.20833333333333331,
 0.20833333333333331,
 0.20833333333333331,
 0.08333333333333333,
 0.08333333333333333]
# # ALT. compute f_{Y|X}(y|heads)*0.5 + f_{Y|X}(y|tails)*0.5
# fYs = [fD(d)*0.5+fD4(d)*0.5 for d in range(1,6+1)]
# fYs
sum(fYs)
1.0

Example 4: diagnostic test#

def fV(v):
    if v == 1:
        return 0.03
    elif v == 0:
        return 0.97

def fTgivenV1(t):
    if t == 1:
        return 0.90
    elif t == 0:
        return 0.10

def fTgivenV0(t):
    if t == 1:
        return 0.20
    elif t == 0:
        return 0.80

def fTgivenV(t,v):
    if v == 1:
        return fTgivenV1(t)
    elif v == 0:
        return fTgivenV0(t)

def fVT(v,t):
    return fV(v)*fTgivenV(t,v)
vs, ts, fVTs = [], [], []
for v in range(0,1+1):
    for t in range(0,1+1):
        vs.append(v)
        ts.append(t)
        fVTvt = fVT(v,t)
        fVTs.append(fVTvt)

fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
ax.stem(vs, ts, fVTs, basefmt=" ", bottom=0.0)
ax.set_xticks([0,1])
ax.set_xlabel("$v$")
ax.set_yticks([0,1])
ax.set_ylabel("$t$")
ax.set_zlabel("$f_{VT}$")
ax.set_zlim([0, 0.9]);
../_images/c6289d31bc4e5ec28b04b7f43fcf9f0308cef7cb7ef77bffea5c03d8e9d958b2.png
# f_T(0) = 
0.8* 0.97 + 0.10 *.03
0.779
# f_T(1) = 
0.2* 0.97 + 0.90 *.03
0.221
for t in range(0,1+1):
    fVTt = fVT(0,t) + fVT(1,t)
    print("Pr({T="+str(t)+"}) =", fVTt)
Pr({T=0}) = 0.779
Pr({T=1}) = 0.221
for v in range(0,1+1):
    for t in range(0,1+1):
        fVTvt = fVT(v,t)
        print("Pr({V="+str(v) + ",T="+str(t)+"}) =",
              fTgivenV(t,v), "*", fV(v), "=", fVTvt)
Pr({V=0,T=0}) = 0.8 * 0.97 = 0.776
Pr({V=0,T=1}) = 0.2 * 0.97 = 0.194
Pr({V=1,T=0}) = 0.1 * 0.03 = 0.003
Pr({V=1,T=1}) = 0.9 * 0.03 = 0.027

Example 4: diagnostic test (continued)#

We’re now interested in \(f_{V|T}(v|t)\), which we can obtain using Bayes’ theorem.

\[ f_{V|T}(v|t) = \frac{ f_{T|V}(t|v) f_V(v) }{ f_{T|V}(t|0) f_V(0) + f_{T|V}(t|1) f_V(1)}. \]

We’ll now define the function fVgivenT that computes the numerator and denominator in this fraction, and returns the ratio.

def fVgivenT(v,t):
    num = fTgivenV(t,v) * fV(v)
    denom = fTgivenV(t,0) * fV(0) + fTgivenV(t,1) * fV(1)
    return num/denom

The probability of the patient having the virus \(f_{V|T}(1|t=1)\), given they tested positive is:

fVgivenT(1,1)
0.12217194570135746

Intuitively, this percentage is measuring the ratio of the probability of the outcome \(f_{VT}(1,1)\) divided by sum of the probabilities of all outcomes where positive test can occur: \(f_{VT}(1,1)+f_{VT}(0,1)\).

fVT(1,1) / (fVT(0,1) + fVT(1,1))
0.12217194570135746

Define datasets for the examples#

Example 1: Multivariable uniform#

xmin = 100
xmax = 200
ymin = 10
ymax = 20

# joint pdf of = randint(100, 200) x randint(10,20)
def fU(x,y):
    A = (xmax-xmin) * (ymax-ymin)
    if xmin <= x and x <= xmax and ymin <= y and y <= ymax:
        return 1/A
    else:
        return 0.0
fU(170,12)
0.001