Section 4.3 — Interpreting linear models#

This notebook contains the code examples from Section 4.3 Interpreting linear models from the No Bullshit Guide to Statistics.

Notebook setup#

# load Python modules
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Figures setup
plt.clf()  # needed otherwise sns.set_theme doesn't work
from plot_helpers import RCPARAMS
# RCPARAMS.update({'figure.figsize': (8, 5)})   # good for screen
RCPARAMS.update({'figure.figsize': (5, 3)})  # good for print
# RCPARAMS.update({'text.latex.preamble': r'\usepackage{amsmath}'})
# RCPARAMS.update({'text.usetex': True})
sns.set_theme(
context="paper",
style="whitegrid",
palette="colorblind",
rc=RCPARAMS,
)

%config InlineBackend.figure_format = 'retina'

# Where to store figures
DESTDIR = "figures/lm/interpreting"

<Figure size 640x480 with 0 Axes>

# set random seed for repeatability
np.random.seed(42)

import statsmodels.formula.api as smf
import statsmodels.api as sm
import statsmodels.stats.api as sms


Introduction#

# load the dataset
n = doctors.shape[0]

# fit the model
formula = "score ~ 1 + alc + weed + exrc"
lm2 = smf.ols(formula, data=doctors).fit()

# model degrees of freedom (number of predictors)
p = lm2.df_model

# display the summary table
lm2.summary()

Dep. Variable: R-squared: score 0.842 OLS 0.839 Least Squares 270.3 Fri, 19 Jul 2024 1.05e-60 14:52:14 -547.63 156 1103. 152 1115. 3 nonrobust
coef std err t P>|t| [0.025 0.975] 60.4529 1.289 46.885 0.000 57.905 63.000 -1.8001 0.070 -25.726 0.000 -1.938 -1.662 -1.0216 0.476 -2.145 0.034 -1.962 -0.081 1.7683 0.138 12.809 0.000 1.496 2.041
 Omnibus: Durbin-Watson: 1.14 1.828 0.565 0.9 0.182 0.638 3.075 31.2

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Model fit metrics#

Coefficient of determination#

lm2.rsquared

0.8421649167873537

# ALT.
1 - lm2.ssr/lm2.centered_tss

0.8421649167873537


lm2.rsquared_adj

0.8390497506713147

# ALT.
1 - (lm2.ssr/(n-p-1)) / (lm2.centered_tss/(n-1))

0.8390497506713147


F-statistic and associated p-value#

lm2.fvalue, lm2.f_pvalue

(270.34350189265837, 1.0512133413866766e-60)


Log-likelihood#

lm2.llf

-547.6259042117637

# ALT.
from scipy.stats import norm
sigmahat = np.sqrt(lm2.scale)
np.sum(np.log(norm.pdf(lm2.resid,scale=sigmahat)))
# Q: Why not exactly the same as lm2.llf?

-547.6519921512181

# How lm2.llf is computed:
SSR = np.sum(lm2.resid**2)
nobs2 = n/2.0
llf = -np.log(SSR)*nobs2                # concentrated likelihood
llf -= ( 1+np.log(np.pi/nobs2) )*nobs2  # with likelihood constant
llf

-547.6259042117637


Information criteria#

lm2.aic, lm2.bic

(1103.2518084235273, 1115.4512324525256)

# ALT.
2*(p+1) - 2*lm2.llf,  np.log(n)*(p+1) - 2*lm2.llf

(1103.2518084235273, 1115.4512324525256)


Other model-fit metrics#

The mean squared error (MSE)

from statsmodels.tools.eval_measures import mse

scores = doctors["score"]
scorehats = lm2.fittedvalues
mse(scores,scorehats), lm2.ssr/n

(65.56013803717558, 65.56013803717558)


Parameter estimates#

# estimated parameters
lm2.params

Intercept    60.452901
alc          -1.800101
weed         -1.021552
exrc          1.768289
dtype: float64

# estimated sigma (std. of error term)
sigmahat = np.sqrt(lm2.scale)
sigmahat

8.202768119825622

# ALT.
np.sqrt(lm2.ssr/(n-p-1))

8.202768119825622


Confidence intervals for model parameters#

# standard errors
lm2.bse

Intercept    1.289380
alc          0.069973
weed         0.476166
exrc         0.138056
dtype: float64

lm2.conf_int(alpha=0.05)

0 1
Intercept 57.905480 63.000321
alc -1.938347 -1.661856
weed -1.962309 -0.080794
exrc 1.495533 2.041044

Hypothesis testing for linear models#

T-tests for individual parameters#

Hypothesis testing for slope coefficient

Is there a non-zero slope coefficient?

Example: t-test for the variable weed#

• Null hypothesis $$H_0$$: weed has no effect on score, which is equivalent to $$\beta_{\texttt{weed}} = 0$$:

$H_0: \quad S \;\sim\; \mathcal{N}( {\color{red} \beta_0 + \beta_{\texttt{alc}}\!\cdot\!\textrm{alc} + \beta_{\texttt{exrc}}\!\cdot\!\textrm{exrc} }, \ \sigma) \qquad \qquad$
• Alternative hypothesis $$H_A$$: weed has an effect on score, and the slope is not zero, $$\beta_{\texttt{weed}} \neq 0$$:

$H_A: \quad S \;\sim\; \mathcal{N}\left( {\color{blue} \beta_0 + \beta_{\texttt{alc}}\!\cdot\!\textrm{alc} + \beta_{\texttt{weed}}\!\cdot\!\textrm{weed} + \beta_{\texttt{exrc}}\!\cdot\!\textrm{exrc} }, \ \sigma \right)$
lm2.tvalues["weed"], lm2.pvalues["weed"], n-p-1

(-2.1453705454368617, 0.03351156181342315, 152.0)


Calculate the $$t$$ statistic:

obst_weed = (lm2.params["weed"] - 0) / lm2.bse["weed"]
obst_weed  # = lm2.tvalues["weed"]

-2.1453705454368617


Calculate the associated $$p$$-value

from scipy.stats import t as tdist

pleft = tdist(df=n-p-1).cdf(obst_weed)
pright = 1 - tdist(df=lm2.df_resid).cdf(obst_weed)
pvalue_weed = 2 * min(pleft, pright)
pvalue_weed  # = lm2.pvalues["weed"]

0.03351156181342315

lm2.tvalues

Intercept    46.885245
alc         -25.725654
weed         -2.145371
exrc         12.808529
dtype: float64

lm2.df_resid, n-p-1

(152.0, 152.0)

lm2.pvalues

Intercept    2.756807e-92
alc          2.985013e-57
weed         3.351156e-02
exrc         6.136296e-26
dtype: float64


F-test for the overall model#

lm2.fvalue

270.34350189265837

# ALT.
F = lm2.mse_model / lm2.mse_resid
F

270.34350189265837

lm2.f_pvalue

1.0512133413866766e-60

# ALT.
from scipy.stats import f as fdist
fdist(dfn=p, dfd=n-p-1).sf(F)

1.0512133413866766e-60


F-test for a submodel#

formula2nw = "score ~ 1 + alc + exrc"
lm2_noweed = smf.ols(formula2nw, data=doctors).fit()
F_noweed, p_noweed, _ = lm2.compare_f_test(lm2_noweed)
F_noweed, p_noweed

(4.602614777228039, 0.033511561813423456)


The F-statistic is the same as the t-statistic#

F_noweed, obst_weed**2, lm2.tvalues["weed"]**2

(4.602614777228039, 4.602614777228057, 4.602614777228057)

p_noweed, pvalue_weed, lm2.pvalues["weed"]

(0.033511561813423456, 0.03351156181342315, 0.03351156181342315)


Assumptions checks and diagnostics#

Are the assumptions for the linear model satisfied?

Residuals plots#

from ministats import plot_resid
plot_resid(lm2, lowess=True);

from ministats import plot_resid
plot_resid(lm2, pred="alc", lowess=True);


Linearity checks#

Look at residuals plots.

Independence checks#

Look at residuals plots.

Normality checks#

Look at residuals plots…

QQ plot inspection#

from statsmodels.graphics.api import qqplot
qqplot(lm2.resid, line="s");

# BONUS
ax = sns.histplot(lm2.resid)
ax.set_xlim([-27,27]);


Skew and kurtosis#

from scipy.stats import skew
from scipy.stats import kurtosis

skew(lm2.resid), kurtosis(lm2.resid, fisher=False)

(0.18224568453683593, 3.074795238556266)


Homoscedasticity checks#

The scale-location plot allows us to see if pattern in the variance of the residuals exists.

from ministats import plot_scaleloc

plot_scaleloc(lm2);

# ALT. Generate the scale-location plot from scratch

# 1. Compute the square root of the standardized residuals
scorehats = lm2.fittedvalues
std_resids = lm2.resid / np.sqrt(lm2.scale)
sqrt_std_resids = np.sqrt(np.abs(std_resids))

# 2. Generate the scatter plot
ax = sns.scatterplot(x=scorehats, y=sqrt_std_resids)

# 3. Add the LOWESS curve
from statsmodels.nonparametric.smoothers_lowess import lowess
xgrid, ylowess = lowess(sqrt_std_resids, scorehats).T
sns.lineplot(x=xgrid, y=ylowess, ax=ax)

ax.set_ylabel(r"$\sqrt{|standardized\ residuals|}$")
ax.set_xlabel("fitted values");


Collinearity checks#

Check the correlation matrix#

corrM = doctors[["alc", "weed", "exrc"]].corr()
corrM.round(4)

alc weed exrc
alc 1.0000 0.0422 0.0336
weed 0.0422 1.0000 0.0952
exrc 0.0336 0.0952 1.0000
# # BONUS 1 (not covered anywhere earlier in the book)
# sns.heatmap(corrM, annot=True, fmt='.2f', cmap="Grays");

# # BONUS 2 (not covered anywhere earlier in the book)
# sns.pairplot(doctors[["alc", "weed", "exrc"]]);


Condition number#

lm2.condition_number

31.229721453770164

# ALT. (compute from scratch)
#######################################################
eigvals = np.linalg.eigvals(X.T @ X)
lam_max, lam_min = np.max(eigvals), np.min(eigvals)
np.sqrt(lam_max/lam_min)

31.229721453770264


Variance inflation factor#

from ministats import calc_lm_vif

calc_lm_vif(lm2, "alc"), calc_lm_vif(lm2, "weed"), calc_lm_vif(lm2, "exrc")

(1.0026692156304757, 1.010703145007322, 1.010048809433661)

# ALT. using variance_inflation_factor from statsmodels
from statsmodels.stats.outliers_influence \
import variance_inflation_factor as statsmodels_vif

statsmodels_vif(X, 1)  # variation inflation factor of "alc" in lm2

1.0026692156304757


Perfect multicollinearity example#

doctorscol = doctors.copy()
doctorscol["alc2"] = doctors["alc"]
formula2col = "score ~ 1 + alc + alc2 + weed + exrc"
lm2col = smf.ols(formula2col, data=doctorscol).fit()
lm2col.params

Intercept    60.452901
alc          -0.900051
alc2         -0.900051
weed         -1.021552
exrc          1.768289
dtype: float64

calc_lm_vif(lm2col, "alc")

/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/ministats/linear_models.py:17: RuntimeWarning: divide by zero encountered in scalar divide
vif = 1. / (1. - r_squared_i)

inf


Outliers and influential observations#

TODO: definitions

Leverage and influence metrics#

TODO: add definitions of quantities in point form

Leverage plots#

from statsmodels.graphics.api import plot_leverage_resid2

plot_leverage_resid2(lm2);


Influence plots#

from statsmodels.graphics.api import influence_plot

influence_plot(lm2, criterion="cooks", size=4);


Model predictions#

Prediction on the dataset#

lm2.fittedvalues

0      55.345142
1      32.408174
2      71.030821
3      56.789073
4      57.514154
...
151    58.867105
152    43.049719
153    57.728460
154    69.617361
155    78.135788
Length: 156, dtype: float64


Prediction for new data#

newdoc = {"alc":3, "weed":1, "exrc":8}
lm2.predict(newdoc)

0    68.177355
dtype: float64

newdoc_score_pred = lm2.get_prediction(newdoc)
newdoc_score_pred.conf_int(obs=True, alpha=0.1)

array([[54.50324518, 81.85146489]])


In-sample prediction accuracy#

# Compute the in-sample mean squared error (MSE)
scores = doctors['score']
scorehats = lm2.fittedvalues
mse = np.mean( (scores - scorehats)**2 )
mse

65.56013803717558

# ALT.
lm2.ssr/n

65.56013803717558

# # ALT2.
# from statsmodels.tools.eval_measures import mse
# mse(scores,scorehats)


TODO: explain

Leave-one-out cross-validation#

loo_preds = np.zeros(n)
for i in range(n):
doctors_no_i = doctors.drop(index=i)
lm2_no_i = smf.ols(formula,data=doctors_no_i).fit()
predictors_i = dict(doctors.loc[i,:])
pred_i = lm2_no_i.predict(predictors_i)[0]
loo_preds[i] = pred_i

# Calculate the out-of-sample mean squared error
mse_loo = np.mean( (doctors['score'] - loo_preds)**2 )
mse_loo

69.2005451471095


Compare with the in-sample MSE of the model which is lower.

lm2.ssr/n

65.56013803717558


Towards machine learning#

The out-of-sample prediction accuracy is a common metric used in machine learning (ML) tasks.

Explanations#

Adjusted $$R^2$$#

TODO: show formula

# TODO


Calculating standard error of parameters (optional)#

lm2.bse

Intercept    1.289380
alc          0.069973
weed         0.476166
exrc         0.138056
dtype: float64

# construct the design matrix for the model
# calculate the diagonal of the inverse-covariance matrix
inv_covs = np.diag(np.linalg.inv(X.T.dot(X)))
sigmahat*np.sqrt(inv_covs)

array([1.28938008, 0.06997301, 0.4761656 , 0.13805557])

# these lead to approx. same result because predictors are not correlated
# but the correct formula is to use the inv. covariance matrix as above.
sum_alc_dev2 = np.sum((doctors["alc"] - doctors["alc"].mean())**2)
sum_weed_dev2 = np.sum((doctors["weed"] - doctors["weed"].mean())**2)
sum_exrc_dev2 = np.sum((doctors["exrc"] - doctors["exrc"].mean())**2)
se_b_alc = sigmahat / np.sqrt(sum_alc_dev2)
se_b_weed = sigmahat / np.sqrt(sum_weed_dev2)
se_b_exrc = sigmahat / np.sqrt(sum_exrc_dev2)
se_b_alc, se_b_weed, se_b_exrc

(0.06987980522527787, 0.4736376432238543, 0.13736710473639843)


Metrics for influential observations#

Leverage score#

$h_i = 1 - \frac{r_i}{r_{i(i)}},$
infl = lm2.get_influence()
infl.hat_matrix_diag[0:5]

array([0.09502413, 0.0129673 , 0.01709783, 0.01465484, 0.00981632])


Studentized residuals#

$t_{i} = \frac{r_i}{\hat{\sigma}_{(i)} \sqrt{1-h_i}}$
infl.resid_studentized_external[0:5]

array([ 0.98085317, -2.03409243, -1.61072775, -0.21903275, -1.29094028])


Cook’s distance#

$D_i = \frac{r_i^2}{p \, \widehat{\sigma}^2} \cdot \frac{h_{i}}{(1-h_{i})^2} % = \frac{t_i^2 }{p}\frac{h_i}{1-h_i}$
infl.cooks_distance[0][0:5]

array([0.02526116, 0.01331454, 0.01116562, 0.00017951, 0.0041123 ])


DFFITS diagnostic#

$\textrm{DFFITS}_i = \frac{ \widehat{y}_i - \widehat{y}_{i(i)} }{ \widehat{\sigma}_{(i)} \sqrt{h_i} }.$
infl.dffits[0][0:5]

array([ 0.31783554, -0.23314692, -0.21244055, -0.02671194, -0.12853536])


Calculating leverage and influence metrics using statsmodels#

infl = lm2.get_influence()
infl_df = infl.summary_frame()
# list(infl_df.columns)

cols = ["student_resid", "hat_diag", "cooks_d", "dffits"]

student_resid hat_diag cooks_d dffits
0 0.981 0.095 0.025 0.318
1 -2.034 0.013 0.013 -0.233
2 -1.611 0.017 0.011 -0.212
3 -0.219 0.015 0.000 -0.027
4 -1.291 0.010 0.004 -0.129

Hypothesis tests for checking linear model assumptions#

lm2.diagn

{'jb': 0.8999138579592745,
'jbpv': 0.6376556155083202,
'skew': 0.18224568453683593,
'kurtosis': 3.074795238556266,
'omni': 1.1403999852814177,
'omnipv': 0.5654123490825862,
'condno': 31.229721453770164,
'mineigval': 40.14996367643263}


Hypothesis test for linearity#

sms.linear_harvey_collier(lm2)

TtestResult(statistic=1.4317316989777105, pvalue=0.15427366179829397, df=152)


Normality tests#

from statsmodels.stats.stattools import omni_normtest

omni, omnipv = omni_normtest(lm2.resid)
omni, omnipv

(1.1403999852814177, 0.5654123490825862)

from statsmodels.stats.stattools import jarque_bera

jb, jbpv, skew, kurtosis = jarque_bera(lm2.resid)
jb, jbpv, skew, kurtosis

(0.8999138579592745,
0.6376556155083202,
0.18224568453683593,
3.074795238556266)


Independence test (autocorrelation)#

from statsmodels.stats.stattools import durbin_watson

durbin_watson(lm2.resid)

1.8284830211766734


Homoskedasticity tests#

Breusch-Pagan#

https://en.wikipedia.org/wiki/Breusch-Pagan_test

lm, p_lm, _, _ = sms.het_breuschpagan(lm2.resid, lm2.model.exog)
lm, p_lm

(6.254809752854401, 0.09985027888796173)

Goldfeld-Quandt test#

https://en.wikipedia.org/wiki/Goldfeld-Quandt_test

# sort by score
idxs = doctors.sort_values("alc").index

# sort by scorehats
idxs = np.argsort(lm2.fittedvalues)
X = lm2.model.exog[idxs,:]
resid = lm2.resid[idxs]

# run the test
F, p_F, _ = sms.het_goldfeldquandt(resid, X, idx=1)
F, p_F

(1.5346515257083706, 0.03369473000286668)


Discussion#

Maximum likelihood estimate#

from scipy.optimize import minimize
from scipy.stats import norm

n = len(doctors)
preds = doctors[["alc", "weed", "exrc"]]
scores = doctors["score"]

def negloglikelihood(betas):
scorehats = betas[0] + (betas[1:]*preds).sum(1)
residuals = scores - scorehats
SSR = np.sum(residuals**2)
sigmahat = np.sqrt(SSR/(n-3-1))
likelihoods = norm.pdf(residuals,scale=sigmahat)
loglikelihood = np.sum(np.log(likelihoods))
return -1 * loglikelihood

betas = minimize(negloglikelihood, x0=[0,0,0,0]).x
betas

array([60.4528995 , -1.80010127, -1.02155223,  1.76828879])

lm2.params.values

array([60.45290059, -1.80010132, -1.02155166,  1.76828876])


Regularization#

doctors[["alc", "weed", "exrc"]].std()

alc     9.428506
weed    1.391068
exrc    4.796361
dtype: float64

# TRUE
# alc =  - 1.8
# weed = - 0.5
# exrc = + 1.9

# FITTED
lm2.params

Intercept    60.452901
alc          -1.800101
weed         -1.021552
exrc          1.768289
dtype: float64


L1 regularization (LASSO)#

Set alpha option to the desired value $$\alpha_1$$ and L1_wt = 1, which means $$100\%$$ L1 regularization.

lm2_L1reg = smf.ols(formula, data=doctors) \
.fit_regularized(alpha=0.3, L1_wt=1.0)
lm2_L1reg.params

Intercept    58.930367
alc          -1.763306
weed          0.000000
exrc          1.795228
dtype: float64


L2 regularization (ridge)#

Set alpha option to the desired value $$\alpha_2$$ and L1_wt = 0, which means $$0\%$$ L1 regularization and $$100\%$$ L2 regularization.

lm2_L2reg = smf.ols(formula, data=doctors) \
.fit_regularized(alpha=0.05, L1_wt=0.0001)
lm2_L2reg.params

Intercept    50.694960
alc          -1.472536
weed         -0.457425
exrc          2.323386
dtype: float64


Statsmodels diagnostics plots (BONUS TOPIC)#

TODO: import explanations from .tex

Plot fit against one regressor#

from statsmodels.graphics.api import plot_fit

with plt.rc_context({"figure.figsize":(9,2.6)}):
fig, (ax1,ax2,ax3) = plt.subplots(1,3, sharey=True)
plot_fit(lm2, "alc",  ax=ax1)
plot_fit(lm2, "weed", ax=ax2)
plot_fit(lm2, "exrc", ax=ax3)
ax2.get_legend().remove()
ax3.get_legend().remove()


Partial regression plot#

https://www.statsmodels.org/stable/generated/statsmodels.graphics.regressionplots.plot_partregress.html

from statsmodels.graphics.api import plot_partregress

with plt.rc_context({"figure.figsize":(9,2.6)}):
fig, (ax1,ax2,ax3) = plt.subplots(1,3, sharey=True)
plot_partregress("score", "alc",  exog_others=[], data=doctors, obs_labels=False, ax=ax1)
plot_partregress("score", "weed", exog_others=[], data=doctors, obs_labels=False, ax=ax2)
plot_partregress("score", "exrc", exog_others=[], data=doctors, obs_labels=False, ax=ax3)


CCPR plot#

component and component-plus-residual

from statsmodels.graphics.api import plot_ccpr

with plt.rc_context({"figure.figsize":(9,2.6), "text.usetex":True}):
fig, (ax1,ax2,ax3) = plt.subplots(1,3)

plot_ccpr(lm2, "alc", ax=ax1)
ax1.set_title("")
ax1.set_ylabel(r"$\texttt{alc} \cdot \widehat{\beta}_{\texttt{alc}} \; + \; \mathbf{r}$")

plot_ccpr(lm2, "weed", ax=ax2)
ax2.set_title("")
ax2.set_ylabel(r"$\texttt{weed} \cdot \widehat{\beta}_{\texttt{weed}} \; + \; \mathbf{r}$", labelpad=-3)

plot_ccpr(lm2, "exrc", ax=ax3)
ax3.set_title("")
ax3.set_ylabel(r"$\texttt{exrc} \cdot \widehat{\beta}_{\texttt{exrc}} \; + \; \mathbf{r}$", labelpad=-3)

Error in callback <function _draw_all_if_interactive at 0x7f3ceecf11f0> (for post_execute):

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/texmanager.py:255, in TexManager._run_checked_subprocess(cls, command, tex, cwd)
254 try:
--> 255     report = subprocess.check_output(
256         command, cwd=cwd if cwd is not None else cls.texcache,
257         stderr=subprocess.STDOUT)
258 except FileNotFoundError as exc:

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/subprocess.py:415, in check_output(timeout, *popenargs, **kwargs)
413     kwargs['input'] = empty
--> 415 return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
416            **kwargs).stdout

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/subprocess.py:493, in run(input, capture_output, timeout, check, *popenargs, **kwargs)
491     kwargs['stderr'] = PIPE
--> 493 with Popen(*popenargs, **kwargs) as process:
494     try:

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/subprocess.py:858, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors, text)
855             self.stderr = io.TextIOWrapper(self.stderr,
856                     encoding=encoding, errors=errors)
--> 858     self._execute_child(args, executable, preexec_fn, close_fds,
859                         pass_fds, cwd, env,
860                         startupinfo, creationflags, shell,
864                         restore_signals, start_new_session)
865 except:
866     # Cleanup if the child failed starting.

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/subprocess.py:1720, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session)
1719         err_msg = os.strerror(errno_num)
-> 1720     raise child_exception_type(errno_num, err_msg, err_filename)
1721 raise child_exception_type(err_msg)

FileNotFoundError: [Errno 2] No such file or directory: 'latex'

The above exception was the direct cause of the following exception:

RuntimeError                              Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/pyplot.py:120, in _draw_all_if_interactive()
118 def _draw_all_if_interactive():
119     if matplotlib.is_interactive():
--> 120         draw_all()

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/_pylab_helpers.py:132, in Gcf.draw_all(cls, force)
130 for manager in cls.get_all_fig_managers():
131     if force or manager.canvas.figure.stale:
--> 132         manager.canvas.draw_idle()

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/backend_bases.py:2082, in FigureCanvasBase.draw_idle(self, *args, **kwargs)
2080 if not self._is_idle_drawing:
2081     with self._idle_draw_cntx():
-> 2082         self.draw(*args, **kwargs)

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py:400, in FigureCanvasAgg.draw(self)
396 # Acquire a lock on the shared font cache.
397 with RendererAgg.lock, \
398      (self.toolbar._wait_cursor_for_draw_cm() if self.toolbar
399       else nullcontext()):
--> 400     self.figure.draw(self.renderer)
401     # A GUI class may be need to update a window using this draw, so
402     # don't forget to call the superclass.
403     super().draw()

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/artist.py:95, in _finalize_rasterization.<locals>.draw_wrapper(artist, renderer, *args, **kwargs)
93 @wraps(draw)
94 def draw_wrapper(artist, renderer, *args, **kwargs):
---> 95     result = draw(artist, renderer, *args, **kwargs)
96     if renderer._rasterizing:
97         renderer.stop_rasterizing()

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/artist.py:72, in allow_rasterization.<locals>.draw_wrapper(artist, renderer)
69     if artist.get_agg_filter() is not None:
70         renderer.start_filter()
---> 72     return draw(artist, renderer)
73 finally:
74     if artist.get_agg_filter() is not None:

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/figure.py:3175, in Figure.draw(self, renderer)
3172         # ValueError can occur when resizing a window.
3174 self.patch.draw(renderer)
-> 3175 mimage._draw_list_compositing_images(
3176     renderer, self, artists, self.suppressComposite)
3178 for sfig in self.subfigs:
3179     sfig.draw(renderer)

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/image.py:131, in _draw_list_compositing_images(renderer, parent, artists, suppress_composite)
129 if not_composite or not has_images:
130     for a in artists:
--> 131         a.draw(renderer)
132 else:
133     # Composite any adjacent images together
134     image_group = []

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/artist.py:72, in allow_rasterization.<locals>.draw_wrapper(artist, renderer)
69     if artist.get_agg_filter() is not None:
70         renderer.start_filter()
---> 72     return draw(artist, renderer)
73 finally:
74     if artist.get_agg_filter() is not None:

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/axes/_base.py:3028, in _AxesBase.draw(self, renderer)
3025     for spine in self.spines.values():
3026         artists.remove(spine)
-> 3028 self._update_title_position(renderer)
3030 if not self.axison:
3031     for _axis in self._axis_map.values():

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/axes/_base.py:2963, in _AxesBase._update_title_position(self, renderer)
2960 bb = None
2961 if (ax.xaxis.get_ticks_position() in ['top', 'unknown']
2962         or ax.xaxis.get_label_position() == 'top'):
-> 2963     bb = ax.xaxis.get_tightbbox(renderer)
2964 if bb is None:
2965     if 'outline' in ax.spines:
2966         # Special case for colorbars:

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/axis.py:1337, in Axis.get_tightbbox(self, renderer, for_layout_only)
1334     renderer = self.figure._get_renderer()
1335 ticks_to_draw = self._update_ticks()
-> 1337 self._update_label_position(renderer)
1339 # go back to just this axis's tick labels
1340 tlb1, tlb2 = self._get_ticklabel_bboxes(ticks_to_draw, renderer)

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/axis.py:2353, in XAxis._update_label_position(self, renderer)
2349     return
2351 # get bounding boxes for this axis and any siblings
2352 # that have been set by fig.align_xlabels()
-> 2353 bboxes, bboxes2 = self._get_tick_boxes_siblings(renderer=renderer)
2355 x, y = self.label.get_position()
2356 if self.label_position == 'bottom':

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/axis.py:2149, in Axis._get_tick_boxes_siblings(self, renderer)
2147 axis = getattr(ax, f"{axis_name}axis")
2148 ticks_to_draw = axis._update_ticks()
-> 2149 tlb, tlb2 = axis._get_ticklabel_bboxes(ticks_to_draw, renderer)
2150 bboxes.extend(tlb)
2151 bboxes2.extend(tlb2)

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/axis.py:1316, in Axis._get_ticklabel_bboxes(self, ticks, renderer)
1314 if renderer is None:
1315     renderer = self.figure._get_renderer()
-> 1316 return ([tick.label1.get_window_extent(renderer)
1317          for tick in ticks if tick.label1.get_visible()],
1318         [tick.label2.get_window_extent(renderer)
1319          for tick in ticks if tick.label2.get_visible()])

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/axis.py:1316, in <listcomp>(.0)
1314 if renderer is None:
1315     renderer = self.figure._get_renderer()
-> 1316 return ([tick.label1.get_window_extent(renderer)
1317          for tick in ticks if tick.label1.get_visible()],
1318         [tick.label2.get_window_extent(renderer)
1319          for tick in ticks if tick.label2.get_visible()])

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/text.py:959, in Text.get_window_extent(self, renderer, dpi)
954     raise RuntimeError(
955         "Cannot get window extent of text w/o renderer. You likely "
956         "want to call 'figure.draw_without_rendering()' first.")
958 with cbook._setattr_cm(self.figure, dpi=dpi):
--> 959     bbox, info, descent = self._get_layout(self._renderer)
960     x, y = self.get_unitless_position()
961     x, y = self.get_transform().transform((x, y))

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/text.py:378, in Text._get_layout(self, renderer)
375 ys = []
377 # Full vertical extent of font, including ascenders and descenders:
--> 378 _, lp_h, lp_d = _get_text_metrics_with_cache(
379     renderer, "lp", self._fontproperties,
380     ismath="TeX" if self.get_usetex() else False, dpi=self.figure.dpi)
381 min_dy = (lp_h - lp_d) * self._linespacing
383 for i, line in enumerate(lines):

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/text.py:97, in _get_text_metrics_with_cache(renderer, text, fontprop, ismath, dpi)
94 """Call renderer.get_text_width_height_descent, caching the results."""
95 # Cached based on a copy of fontprop so that later in-place mutations of
96 # the passed-in argument do not mess up the cache.
---> 97 return _get_text_metrics_with_cache_impl(
98     weakref.ref(renderer), text, fontprop.copy(), ismath, dpi)

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/text.py:105, in _get_text_metrics_with_cache_impl(renderer_ref, text, fontprop, ismath, dpi)
101 @functools.lru_cache(4096)
102 def _get_text_metrics_with_cache_impl(
103         renderer_ref, text, fontprop, ismath, dpi):
104     # dpi is unused, but participates in cache invalidation (via the renderer).
--> 105     return renderer_ref().get_text_width_height_descent(text, fontprop, ismath)

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py:226, in RendererAgg.get_text_width_height_descent(self, s, prop, ismath)
224 _api.check_in_list(["TeX", True, False], ismath=ismath)
225 if ismath == "TeX":
--> 226     return super().get_text_width_height_descent(s, prop, ismath)
228 if ismath:
229     ox, oy, width, height, descent, font_image = \
230         self.mathtext_parser.parse(s, self.dpi, prop)

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/backend_bases.py:645, in RendererBase.get_text_width_height_descent(self, s, prop, ismath)
641 fontsize = prop.get_size_in_points()
643 if ismath == 'TeX':
644     # todo: handle properties
--> 645     return self.get_texmanager().get_text_width_height_descent(
646         s, fontsize, renderer=self)
648 dpi = self.points_to_pixels(72)
649 if ismath:

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/texmanager.py:368, in TexManager.get_text_width_height_descent(cls, tex, fontsize, renderer)
366 if tex.strip() == '':
367     return 0, 0, 0
--> 368 dvifile = cls.make_dvi(tex, fontsize)
369 dpi_fraction = renderer.points_to_pixels(1.) if renderer else 1
370 with dviread.Dvi(dvifile, 72 * dpi_fraction) as dvi:

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/texmanager.py:300, in TexManager.make_dvi(cls, tex, fontsize)
298     with TemporaryDirectory(dir=cwd) as tmpdir:
299         tmppath = Path(tmpdir)
--> 300         cls._run_checked_subprocess(
301             ["latex", "-interaction=nonstopmode", "--halt-on-error",
302              f"--output-directory={tmppath.name}",
303              f"{texfile.name}"], tex, cwd=cwd)
304         (tmppath / Path(dvifile).name).replace(dvifile)
305 return dvifile

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/texmanager.py:259, in TexManager._run_checked_subprocess(cls, command, tex, cwd)
255     report = subprocess.check_output(
256         command, cwd=cwd if cwd is not None else cls.texcache,
257         stderr=subprocess.STDOUT)
258 except FileNotFoundError as exc:
--> 259     raise RuntimeError(
260         'Failed to process string with tex because {} could not be '
261         'found'.format(command[0])) from exc
262 except subprocess.CalledProcessError as exc:
263     raise RuntimeError(
264         '{prog} was not able to process the following string:\n'
265         '{tex!r}\n\n'
(...)
272             exc=exc.output.decode('utf-8', 'backslashreplace'))
273         ) from None

RuntimeError: Failed to process string with tex because latex could not be found

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/texmanager.py:255, in TexManager._run_checked_subprocess(cls, command, tex, cwd)
254 try:
--> 255     report = subprocess.check_output(
256         command, cwd=cwd if cwd is not None else cls.texcache,
257         stderr=subprocess.STDOUT)
258 except FileNotFoundError as exc:

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/subprocess.py:415, in check_output(timeout, *popenargs, **kwargs)
413     kwargs['input'] = empty
--> 415 return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
416            **kwargs).stdout

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/subprocess.py:493, in run(input, capture_output, timeout, check, *popenargs, **kwargs)
491     kwargs['stderr'] = PIPE
--> 493 with Popen(*popenargs, **kwargs) as process:
494     try:

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/subprocess.py:858, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors, text)
855             self.stderr = io.TextIOWrapper(self.stderr,
856                     encoding=encoding, errors=errors)
--> 858     self._execute_child(args, executable, preexec_fn, close_fds,
859                         pass_fds, cwd, env,
860                         startupinfo, creationflags, shell,
864                         restore_signals, start_new_session)
865 except:
866     # Cleanup if the child failed starting.

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/subprocess.py:1720, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session)
1719         err_msg = os.strerror(errno_num)
-> 1720     raise child_exception_type(errno_num, err_msg, err_filename)
1721 raise child_exception_type(err_msg)

FileNotFoundError: [Errno 2] No such file or directory: 'latex'

The above exception was the direct cause of the following exception:

RuntimeError                              Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/IPython/core/formatters.py:340, in BaseFormatter.__call__(self, obj)
338     pass
339 else:
--> 340     return printer(obj)
341 # Finally look for special method names
342 method = get_real_method(obj, self.print_method)

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/IPython/core/pylabtools.py:169, in retina_figure(fig, base64, **kwargs)
160 def retina_figure(fig, base64=False, **kwargs):
161     """format a figure as a pixel-doubled (retina) PNG
162
163     If base64 is True, return base64-encoded str instead of raw bytes
(...)
167         base64 argument
168     """
--> 169     pngdata = print_figure(fig, fmt="retina", base64=False, **kwargs)
170     # Make sure that retina_figure acts just like print_figure and returns
171     # None when the figure is empty.
172     if pngdata is None:

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/IPython/core/pylabtools.py:152, in print_figure(fig, fmt, bbox_inches, base64, **kwargs)
149     from matplotlib.backend_bases import FigureCanvasBase
150     FigureCanvasBase(fig)
--> 152 fig.canvas.print_figure(bytes_io, **kw)
153 data = bytes_io.getvalue()
154 if fmt == 'svg':

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/backend_bases.py:2342, in FigureCanvasBase.print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)
2336     renderer = _get_renderer(
2337         self.figure,
2338         functools.partial(
2339             print_method, orientation=orientation)
2340     )
2341     with getattr(renderer, "_draw_disabled", nullcontext)():
-> 2342         self.figure.draw(renderer)
2344 if bbox_inches:
2345     if bbox_inches == "tight":

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/artist.py:95, in _finalize_rasterization.<locals>.draw_wrapper(artist, renderer, *args, **kwargs)
93 @wraps(draw)
94 def draw_wrapper(artist, renderer, *args, **kwargs):
---> 95     result = draw(artist, renderer, *args, **kwargs)
96     if renderer._rasterizing:
97         renderer.stop_rasterizing()

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/artist.py:72, in allow_rasterization.<locals>.draw_wrapper(artist, renderer)
69     if artist.get_agg_filter() is not None:
70         renderer.start_filter()
---> 72     return draw(artist, renderer)
73 finally:
74     if artist.get_agg_filter() is not None:

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/figure.py:3175, in Figure.draw(self, renderer)
3172         # ValueError can occur when resizing a window.
3174 self.patch.draw(renderer)
-> 3175 mimage._draw_list_compositing_images(
3176     renderer, self, artists, self.suppressComposite)
3178 for sfig in self.subfigs:
3179     sfig.draw(renderer)

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/image.py:131, in _draw_list_compositing_images(renderer, parent, artists, suppress_composite)
129 if not_composite or not has_images:
130     for a in artists:
--> 131         a.draw(renderer)
132 else:
133     # Composite any adjacent images together
134     image_group = []

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/artist.py:72, in allow_rasterization.<locals>.draw_wrapper(artist, renderer)
69     if artist.get_agg_filter() is not None:
70         renderer.start_filter()
---> 72     return draw(artist, renderer)
73 finally:
74     if artist.get_agg_filter() is not None:

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/axes/_base.py:3028, in _AxesBase.draw(self, renderer)
3025     for spine in self.spines.values():
3026         artists.remove(spine)
-> 3028 self._update_title_position(renderer)
3030 if not self.axison:
3031     for _axis in self._axis_map.values():

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/axes/_base.py:2963, in _AxesBase._update_title_position(self, renderer)
2960 bb = None
2961 if (ax.xaxis.get_ticks_position() in ['top', 'unknown']
2962         or ax.xaxis.get_label_position() == 'top'):
-> 2963     bb = ax.xaxis.get_tightbbox(renderer)
2964 if bb is None:
2965     if 'outline' in ax.spines:
2966         # Special case for colorbars:

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/axis.py:1337, in Axis.get_tightbbox(self, renderer, for_layout_only)
1334     renderer = self.figure._get_renderer()
1335 ticks_to_draw = self._update_ticks()
-> 1337 self._update_label_position(renderer)
1339 # go back to just this axis's tick labels
1340 tlb1, tlb2 = self._get_ticklabel_bboxes(ticks_to_draw, renderer)

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/axis.py:2353, in XAxis._update_label_position(self, renderer)
2349     return
2351 # get bounding boxes for this axis and any siblings
2352 # that have been set by fig.align_xlabels()
-> 2353 bboxes, bboxes2 = self._get_tick_boxes_siblings(renderer=renderer)
2355 x, y = self.label.get_position()
2356 if self.label_position == 'bottom':

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/axis.py:2149, in Axis._get_tick_boxes_siblings(self, renderer)
2147 axis = getattr(ax, f"{axis_name}axis")
2148 ticks_to_draw = axis._update_ticks()
-> 2149 tlb, tlb2 = axis._get_ticklabel_bboxes(ticks_to_draw, renderer)
2150 bboxes.extend(tlb)
2151 bboxes2.extend(tlb2)

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/axis.py:1316, in Axis._get_ticklabel_bboxes(self, ticks, renderer)
1314 if renderer is None:
1315     renderer = self.figure._get_renderer()
-> 1316 return ([tick.label1.get_window_extent(renderer)
1317          for tick in ticks if tick.label1.get_visible()],
1318         [tick.label2.get_window_extent(renderer)
1319          for tick in ticks if tick.label2.get_visible()])

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/axis.py:1316, in <listcomp>(.0)
1314 if renderer is None:
1315     renderer = self.figure._get_renderer()
-> 1316 return ([tick.label1.get_window_extent(renderer)
1317          for tick in ticks if tick.label1.get_visible()],
1318         [tick.label2.get_window_extent(renderer)
1319          for tick in ticks if tick.label2.get_visible()])

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/text.py:959, in Text.get_window_extent(self, renderer, dpi)
954     raise RuntimeError(
955         "Cannot get window extent of text w/o renderer. You likely "
956         "want to call 'figure.draw_without_rendering()' first.")
958 with cbook._setattr_cm(self.figure, dpi=dpi):
--> 959     bbox, info, descent = self._get_layout(self._renderer)
960     x, y = self.get_unitless_position()
961     x, y = self.get_transform().transform((x, y))

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/text.py:378, in Text._get_layout(self, renderer)
375 ys = []
377 # Full vertical extent of font, including ascenders and descenders:
--> 378 _, lp_h, lp_d = _get_text_metrics_with_cache(
379     renderer, "lp", self._fontproperties,
380     ismath="TeX" if self.get_usetex() else False, dpi=self.figure.dpi)
381 min_dy = (lp_h - lp_d) * self._linespacing
383 for i, line in enumerate(lines):

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/text.py:97, in _get_text_metrics_with_cache(renderer, text, fontprop, ismath, dpi)
94 """Call renderer.get_text_width_height_descent, caching the results."""
95 # Cached based on a copy of fontprop so that later in-place mutations of
96 # the passed-in argument do not mess up the cache.
---> 97 return _get_text_metrics_with_cache_impl(
98     weakref.ref(renderer), text, fontprop.copy(), ismath, dpi)

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/text.py:105, in _get_text_metrics_with_cache_impl(renderer_ref, text, fontprop, ismath, dpi)
101 @functools.lru_cache(4096)
102 def _get_text_metrics_with_cache_impl(
103         renderer_ref, text, fontprop, ismath, dpi):
104     # dpi is unused, but participates in cache invalidation (via the renderer).
--> 105     return renderer_ref().get_text_width_height_descent(text, fontprop, ismath)

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py:226, in RendererAgg.get_text_width_height_descent(self, s, prop, ismath)
224 _api.check_in_list(["TeX", True, False], ismath=ismath)
225 if ismath == "TeX":
--> 226     return super().get_text_width_height_descent(s, prop, ismath)
228 if ismath:
229     ox, oy, width, height, descent, font_image = \
230         self.mathtext_parser.parse(s, self.dpi, prop)

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/backend_bases.py:645, in RendererBase.get_text_width_height_descent(self, s, prop, ismath)
641 fontsize = prop.get_size_in_points()
643 if ismath == 'TeX':
644     # todo: handle properties
--> 645     return self.get_texmanager().get_text_width_height_descent(
646         s, fontsize, renderer=self)
648 dpi = self.points_to_pixels(72)
649 if ismath:

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/texmanager.py:368, in TexManager.get_text_width_height_descent(cls, tex, fontsize, renderer)
366 if tex.strip() == '':
367     return 0, 0, 0
--> 368 dvifile = cls.make_dvi(tex, fontsize)
369 dpi_fraction = renderer.points_to_pixels(1.) if renderer else 1
370 with dviread.Dvi(dvifile, 72 * dpi_fraction) as dvi:

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/texmanager.py:300, in TexManager.make_dvi(cls, tex, fontsize)
298     with TemporaryDirectory(dir=cwd) as tmpdir:
299         tmppath = Path(tmpdir)
--> 300         cls._run_checked_subprocess(
301             ["latex", "-interaction=nonstopmode", "--halt-on-error",
302              f"--output-directory={tmppath.name}",
303              f"{texfile.name}"], tex, cwd=cwd)
304         (tmppath / Path(dvifile).name).replace(dvifile)
305 return dvifile

File /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/matplotlib/texmanager.py:259, in TexManager._run_checked_subprocess(cls, command, tex, cwd)
255     report = subprocess.check_output(
256         command, cwd=cwd if cwd is not None else cls.texcache,
257         stderr=subprocess.STDOUT)
258 except FileNotFoundError as exc:
--> 259     raise RuntimeError(
260         'Failed to process string with tex because {} could not be '
261         'found'.format(command[0])) from exc
262 except subprocess.CalledProcessError as exc:
263     raise RuntimeError(
264         '{prog} was not able to process the following string:\n'
265         '{tex!r}\n\n'
(...)
272             exc=exc.output.decode('utf-8', 'backslashreplace'))
273         ) from None

RuntimeError: Failed to process string with tex because latex could not be found

<Figure size 900x260 with 3 Axes>


All-in-on convenience method#

from statsmodels.graphics.api import plot_regress_exog

with plt.rc_context({"figure.figsize":(9,6)}):
plot_regress_exog(lm2, "alc")


Exercises#

E4.LOGNORM Non-normal error term#

from scipy.stats import uniform, lognorm
np.random.seed(42)
xs = uniform(0,10).rvs(100)
ys = 2*xs + lognorm(1).rvs(100)
df = pd.DataFrame({"x":xs, "y":ys})
lm = smf.ols("y ~ x", data=df).fit()

qqplot(lm.resid, line="s");

# sns.scatterplot(x=xs, y=lm.resid);

# from ministats import plot_lm_scale_loc
# plot_lm_scale_loc(lm);


E4.NL Nonlinear relationship#

from scipy.stats import uniform, lognorm

np.random.seed(42)
xs = uniform(0,10).rvs(100)
ys = 2*xs + xs**2 + norm(0,1).rvs(100)
df = pd.DataFrame({"x":xs, "y":ys})
lm = smf.ols("y ~ 1 + x", data=df).fit()
lm.params

Intercept   -14.684585
x            11.688188
dtype: float64

from ministats import plot_reg
plot_reg(lm);

plot_resid(lm, lowess=True);

qqplot(lm.resid, line="s");


E4.H Heteroskedasticity#

from scipy.stats import uniform, norm

np.random.seed(43)
xs = np.sort(uniform(0,10).rvs(100))
sigmas = np.linspace(1,20,100)
ys = 2*xs + norm(loc=0,scale=sigmas).rvs(100)
df = pd.DataFrame({"x":xs, "y":ys})
lm = smf.ols("y ~ x", data=df).fit()

from ministats import plot_scaleloc
plot_scaleloc(lm);


E4.v Collinearity#

from scipy.stats import uniform, norm

np.random.seed(45)
x1s = uniform(0,10).rvs(100)
alpha = 0.8
x2s = alpha*x1s + (1-alpha)*uniform(0,10).rvs(100)
ys = 2*x1s + 3*x2s + norm(0,1).rvs(100)
df = pd.DataFrame({"x1":x1s, "x2":x2s, "y":ys})
lm = smf.ols("y ~ x1 + x2", data=df).fit()
lm.params, lm.bse

(Intercept    0.220927
x1           1.985774
x2           3.006225
dtype: float64,
Intercept    0.253988
x1           0.137253
x2           0.167492
dtype: float64)

lm.condition_number

24.291898486251146

calc_lm_vif(lm, "x1"), calc_lm_vif(lm, "x2")

(17.17244980334442, 17.17244980334442)


CUT MATERIAL#

Example dataset#

mtcars = sm.datasets.get_rdataset("mtcars", "datasets").data
mtcars

lmcars = smf.ols("mpg ~ hp", data=mtcars).fit()

sns.scatterplot(x=mtcars["hp"], y=mtcars["mpg"])
sns.lineplot(x=mtcars["hp"], y=lmcars.fittedvalues);


Compute the variance/covariance matrix#

lm2.cov_params()
# == lm2.scale * np.linalg.inv(X.T @ X)