Section 1.1 — Introduction to data¶
This notebook contains all the code from Section 1.1 Introduction to data of the No Bullshit Guide to Statistics.
Random selection¶
In [1]:
Copied!
import random
samplingframe = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
random.sample(samplingframe, 3)
import random
samplingframe = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
random.sample(samplingframe, 3)
Out[1]:
[9, 3, 2]
Random assignment¶
In [2]:
Copied!
def flip_coin():
r = random.random()
if r < 0.5:
print("intervention")
else:
print("control")
flip_coin()
def flip_coin():
r = random.random()
if r < 0.5:
print("intervention")
else:
print("control")
flip_coin()
intervention
In [3]:
Copied!
import random
random.seed(22)
import random
random.seed(22)
In [4]:
Copied!
# select sample of 20 from 100 individuals
pop1 = range(1,101)
print(sorted(random.sample(pop1, 20)))
# select sample of 20 from 100 individuals
pop1 = range(1,101)
print(sorted(random.sample(pop1, 20)))
[4, 7, 11, 16, 18, 23, 24, 30, 32, 35, 41, 45, 58, 71, 77, 79, 84, 88, 90, 95]
In [5]:
Copied!
# 5 strata of 20 individuals
strata = [range(1,21) for s in range(5)]
for statum in strata:
print(sorted(random.sample(statum, 4)))
# 5 strata of 20 individuals
strata = [range(1,21) for s in range(5)]
for statum in strata:
print(sorted(random.sample(statum, 4)))
[1, 2, 9, 14] [6, 7, 10, 14] [2, 4, 17, 19] [6, 9, 11, 20] [5, 10, 13, 17]
In [6]:
Copied!
# choose 2 clusters out of 10
clusters = range(1,11)
print(sorted(random.sample(clusters, 2)))
# choose 2 clusters out of 10
clusters = range(1,11)
print(sorted(random.sample(clusters, 2)))
[5, 10]