# Section 2.2 — Multiple random variables#

This notebook contains all the code examples from Section 2.2 Multiple random variables in the No Bullshit Guide to Statistics.

## Notebook setup#

# load Python modules
import numpy as np               # numerical calculations
import matplotlib.pyplot as plt  # generic plotting functions
import seaborn as sns            # plotting distributions

# Figures setup
sns.set_theme(
context="paper",
style="whitegrid",
palette="colorblind",
rc={'figure.figsize': (7,4)},
)
%config InlineBackend.figure_format = 'retina'

# set random seed for repeatability
np.random.seed(42)


## Definitions#

### Example 1: two coin tosses#

def fC(c):
return 1/2
else:
return 0

def fC1C2(c1,c2):
return fC(c1)*fC(c2)

xs, ys, fXYs = [], [], []
map_int_to_label = {0: "heads", 1: "tails"}
for x in range(0,1+1):
for y in range(0,1+1):
xs.append(x)
ys.append(y)
fXYxy = fC1C2(map_int_to_label[x], map_int_to_label[y])
fXYs.append(fXYxy)

fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
ax.stem(xs, ys, fXYs, basefmt=" ")
ax.set_xticks([0,1])
ax.set_yticks([0,1])
ax.set_xlabel("$c_1$")
ax.set_ylabel("$c_2$")
ax.set_zlabel("$f_{C_1C_2}$")
ax.set_zlim([0, 0.3]);


### Example 2: rolling two dice#

def fD(d):
if d in {1,2,3,4,5,6}:
return 1/6
else:
return 0

def fD1D2(d1,d2):
return fD(d1)*fD(d2)

xs, ys, fXYs = [], [], []
for x in range(1,6+1):
for y in range(1,6+1):
xs.append(x)
ys.append(y)
fXYxy = fD1D2(x,y)
fXYs.append(fXYxy)

fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
ax.stem(xs, ys, fXYs, basefmt=" ")
ax.set_xlabel("$d_1$")
ax.set_ylabel("$d_2$")
ax.set_zlabel("$f_{D_1D_2}$")
ax.set_zlim([0, 0.05]);

# PLOT D_1 + D_2 = 7
xs, ys, fXYs = [], [], []
for x in range(1,6+1):
y = 7 - x
xs.append(x)
ys.append(y)
fXYxy = fD1D2(x,y)
fXYs.append(fXYxy)

fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
ax.stem(xs, ys, fXYs, basefmt=" ")
ax.set_xlabel("$d_1$")
ax.set_ylabel("$d_2$")
ax.set_zlabel("$f_{D_1D_2}$")
ax.set_zlim([0, 0.05]);


### Example 3: coin-dependent dice roll#

def fC(c):
return 1/2
else:
return 0

def fD(d):
if d in {1,2,3,4,5,6}:
return 1/6
else:
return 0

def fD4(d):
if d in {1,2,3,4}:
return 1/4
else:
return 0

def fCD(c,d):
return fC(c)*fD(d)
elif c == "tails":
return fC(c)*fD4(d)

xs, ys, fXYs = [], [], []
map_int_to_label = {0: "heads", 1: "tails"}
for x in range(0,1+1):
for y in range(1,6+1):
xs.append(x)
ys.append(y)
fXYxy = fCD(map_int_to_label[x],y)
fXYs.append(fXYxy)

fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
ax.stem(xs, ys, fXYs, basefmt=" ")
ax.set_xticks([0,1])
ax.set_xlabel("$c$")
ax.set_ylabel("$d$")
ax.set_zlabel("$f_{CD}$")
ax.set_zlim([0, 0.2]);

fXYs

[0.08333333333333333,
0.08333333333333333,
0.08333333333333333,
0.08333333333333333,
0.08333333333333333,
0.08333333333333333,
0.125,
0.125,
0.125,
0.125,
0.0,
0.0]

sum(fXYs)

1.0

# f_{Y|X}(y|heads)  [as a list]

[0.08333333333333333,
0.08333333333333333,
0.08333333333333333,
0.08333333333333333,
0.08333333333333333,
0.08333333333333333]

# f_{Y|X}(y|tails)  [as a list]
[fCD("tails",d) for d in range(1,6+1)]

[0.125, 0.125, 0.125, 0.125, 0.0, 0.0]

# f_Y(y)    [as a list]
fYs = [fCD("heads",d)+fCD("tails",d) for d in range(1,6+1)]
fYs

[0.20833333333333331,
0.20833333333333331,
0.20833333333333331,
0.20833333333333331,
0.08333333333333333,
0.08333333333333333]

# # ALT. compute f_{Y|X}(y|heads)*0.5 + f_{Y|X}(y|tails)*0.5
# fYs = [fD(d)*0.5+fD4(d)*0.5 for d in range(1,6+1)]
# fYs

sum(fYs)

1.0


### Example 4: diagnostic test#

def fV(v):
if v == 1:
return 0.03
elif v == 0:
return 0.97

def fTgivenV1(t):
if t == 1:
return 0.90
elif t == 0:
return 0.10

def fTgivenV0(t):
if t == 1:
return 0.20
elif t == 0:
return 0.80

def fTgivenV(t,v):
if v == 1:
return fTgivenV1(t)
elif v == 0:
return fTgivenV0(t)

def fVT(v,t):
return fV(v)*fTgivenV(t,v)

vs, ts, fVTs = [], [], []
for v in range(0,1+1):
for t in range(0,1+1):
vs.append(v)
ts.append(t)
fVTvt = fVT(v,t)
fVTs.append(fVTvt)

fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
ax.stem(vs, ts, fVTs, basefmt=" ", bottom=0.0)
ax.set_xticks([0,1])
ax.set_xlabel("$v$")
ax.set_yticks([0,1])
ax.set_ylabel("$t$")
ax.set_zlabel("$f_{VT}$")
ax.set_zlim([0, 0.9]);

# f_T(0) =
0.8* 0.97 + 0.10 *.03

0.779

# f_T(1) =
0.2* 0.97 + 0.90 *.03

0.221

for t in range(0,1+1):
fVTt = fVT(0,t) + fVT(1,t)
print("Pr({T="+str(t)+"}) =", fVTt)

Pr({T=0}) = 0.779
Pr({T=1}) = 0.221

for v in range(0,1+1):
for t in range(0,1+1):
fVTvt = fVT(v,t)
print("Pr({V="+str(v) + ",T="+str(t)+"}) =",
fTgivenV(t,v), "*", fV(v), "=", fVTvt)

Pr({V=0,T=0}) = 0.8 * 0.97 = 0.776
Pr({V=0,T=1}) = 0.2 * 0.97 = 0.194
Pr({V=1,T=0}) = 0.1 * 0.03 = 0.003
Pr({V=1,T=1}) = 0.9 * 0.03 = 0.027


### Example 4: diagnostic test (continued)#

We’re now interested in $$f_{V|T}(v|t)$$, which we can obtain using Bayes’ theorem.

$f_{V|T}(v|t) = \frac{ f_{T|V}(t|v) f_V(v) }{ f_{T|V}(t|0) f_V(0) + f_{T|V}(t|1) f_V(1)}.$

We’ll now define the function fVgivenT that computes the numerator and denominator in this fraction, and returns the ratio.

def fVgivenT(v,t):
num = fTgivenV(t,v) * fV(v)
denom = fTgivenV(t,0) * fV(0) + fTgivenV(t,1) * fV(1)
return num/denom


The probability of the patient having the virus $$f_{V|T}(1|t=1)$$, given they tested positive is:

fVgivenT(1,1)

0.12217194570135746


Intuitively, this percentage is measuring the ratio of the probability of the outcome $$f_{VT}(1,1)$$ divided by sum of the probabilities of all outcomes where positive test can occur: $$f_{VT}(1,1)+f_{VT}(0,1)$$.

fVT(1,1) / (fVT(0,1) + fVT(1,1))

0.12217194570135746


## Define datasets for the examples#

### Example 1: Multivariable uniform#

xmin = 100
xmax = 200
ymin = 10
ymax = 20

# joint pdf of = randint(100, 200) x randint(10,20)
def fU(x,y):
A = (xmax-xmin) * (ymax-ymin)
if xmin <= x and x <= xmax and ymin <= y and y <= ymax:
return 1/A
else:
return 0.0

fU(170,12)

0.001