Section 1.1 — Introduction to data

Section 1.1 — Introduction to data#

This notebook contains all the code from Section 1.1 Introduction to data of the No Bullshit Guide to Statistics.

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

Random selection#

import random
samplingframe = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
random.sample(samplingframe, 3)
[5, 1, 7]

Random assignment#

def flip_coin():
    r = random.random()
    if r < 0.5:
        print("intervention")
    else:
        print("control")

flip_coin()
control

Discussion#

Random sampling alternatives#

import random
random.seed(22)
# select sample of 20 from 100 individuals
pop1 = range(1,101)
print(sorted(random.sample(pop1, 20)))
[4, 7, 11, 16, 18, 23, 24, 30, 32, 35, 41, 45, 58, 71, 77, 79, 84, 88, 90, 95]
# 5 strata of 20 individuals
strata = [range(1,21) for s in range(5)]
for statum in strata:
    print(sorted(random.sample(statum, 4)))
[1, 2, 9, 14]
[6, 7, 10, 14]
[2, 4, 17, 19]
[6, 9, 11, 20]
[5, 10, 13, 17]
# choose 2 clusters out of 10
clusters = range(1,11)
print(sorted(random.sample(clusters, 2)))
[5, 10]