Section 1.1 — Introduction to data

Section 1.1 — Introduction to data#

This notebook contains all the code from Section 1.1 Introduction to data of the No Bullshit Guide to Statistics.

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import random
samplingframe = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
random.sample(samplingframe, 3)

[3, 2, 8]

def flip_coin():
    r = random.random()
    if r < 0.5:
        print("intervention")
    else:
        print("control")

flip_coin()

control

import random
random.seed(22)

# select sample of 20 from 100 individuals
pop1 = range(1,101)
print(sorted(random.sample(pop1, 20)))

[4, 7, 11, 16, 18, 23, 24, 30, 32, 35, 41, 45, 58, 71, 77, 79, 84, 88, 90, 95]

# 5 strata of 20 individuals
strata = [range(1,21) for s in range(5)]
for statum in strata:
    print(sorted(random.sample(statum, 4)))

[1, 2, 9, 14]
[6, 7, 10, 14]
[2, 4, 17, 19]
[6, 9, 11, 20]
[5, 10, 13, 17]

# choose 2 clusters out of 10
clusters = range(1,11)
print(sorted(random.sample(clusters, 2)))

[5, 10]