%load_ext watermark
%watermark --author "Ryan Sloot |" -d -v
from comp_prob_inference import *
%matplotlib inline
flip_fair_coin()
flips = flip_fair_coins(100)
plot_discrete_histogram(flips)
plt.show()
plot_discrete_histogram(flips, frequency=True)
## 100,000 flips
n = 100000
heads_so_far = 0
fraction_of_heads = []
for i in range(n):
if flip_fair_coin()=='heads':
heads_so_far += 1
fraction_of_heads.append(heads_so_far / (i+1))
plt.figure(figsize=(8,4))
plt.plot(range(1,n+1), fraction_of_heads)
plt.xlabel('Number of Flips')
plt.ylabel('Fraction of heads')
plt.show()
model = {'heads': 1/2, 'tails':1/2}
sample_space = set(model.keys())
model['heads']
sample_space
model = {1: 0.4, 2:0.3, 'cat':0.3}
set(model.keys())
{k: 1/6 for k in range(1,7)}
two_die_sample_space=\
{(k,j): 1/36 for k in range(1,7) for j in range(1,7)}
sample_space = {'HH', 'HT', 'TH', 'TT'}
A = {'HT', 'TT'} # T second flip
B = {'HH', 'HT', 'TH'} # at least one H
C = {'HH'} #both heads
A_intersect_B = A.intersection(B)
A_union_C = A.union(C)
B_complement = sample_space.difference(B)
print(A_intersect_B, A_union_C, B_complement)
def prob_of_event(event, prob_space):
total = 0
for outcome in event:
total += prob_space[outcome]
return total
prob_space = {'sunny': 1/2, 'rainy': 1/6, 'snowy': 1/3}
rainy_or_snowy_event = {'rainy', 'snowy'}
print(prob_of_event(rainy_or_snowy_event, prob_space))
sample space $\Omega$ has size $|\Omega|$ = m, there are $2^m$ events
rolling two six-sided die what is event sum of faces is 7?
space = dict()
rolls = [(i,j) for i in range(1,7) for j in range(1,7)]
for roll in rolls:
if sum(roll) in space:
space[sum(roll)].append(roll)
else:
space[sum(roll)] = [roll]
space
space_ = {k:len(v) for (k,v) in space.items()}
space_ = {k:v/sum(space_.values()) for (k,v) in space_.items()}
assert space_[7] == 1/6, "Wrong"
print('Passed')
set(space[7])
prob_space = {'sunny': 1/2, 'rainy': 1/6, 'snowy': 1/3}
random_outcome = sample_from_finite_probability_space(prob_space)
W = random_outcome
if random_outcome == 'sunny':
I = 1
else:
I = 0
print('I: %d, W: %s' % (I,W))
## mapping I and W r.v.s
W_mapping = {k:k for k in prob_space.keys()}
I_mapping = {'sunny': 1, 'rainy': 0, 'snowy': 0}
random_outcome = sample_from_finite_probability_space(prob_space)
W = W_mapping[random_outcome]
I = I_mapping[random_outcome]
print('I: %d, W: %s' % (I,W))
## stored as probability tables
W_table = prob_space.copy()
I_table = {0: 1/2, 1:1/2}
W = sample_from_finite_probability_space(W_table)
I = sample_from_finite_probability_space(I_table)
print('I: %d, W: %s' % (I,W))
prob_space = {'cat': 0.2, 'dog':0.7, 'shark':0.1}
X = {'cat': 5, 'dog': 5, 'shark': 7}
space_
f_W = {3: 2/3, 42: 1/3}
f_W_sq = {k**2:v for (k,v) in f_W.items()}
f_W_sq
## bad example might be representing as dict
prob_table = {('sunny', 'hot'): 3/10,
('sunny', 'cold'): 1/5,
('rainy', 'hot'): 1/30,
('rainy', 'cold'): 2/15,
('snowy', 'hot'): 0,
('snowy', 'cold'): 1/3}
## W=rainy T=cold
prob_table[('rainy', 'cold')]
## better nested dictionaries
weather = {'sunny','rainy','snowy'}
prob_W_T_dict = {w: {} for w in weather}
## and assign each
prob_W_T_dict['sunny']['hot'] = 3/10
prob_W_T_dict['sunny']['cold'] = 1/5
prob_W_T_dict['rainy']['hot'] = 1/30
prob_W_T_dict['rainy']['cold'] = 2/15
prob_W_T_dict['snowy']['hot'] = 0
prob_W_T_dict['snowy']['cold'] = 1/3
print_joint_prob_table_dict(prob_W_T_dict)
## W=rainy T=cold
prob_W_T_dict['rainy']['cold']
## better? 2d array
import numpy as np
prob_W_T_rows = ['sunny', 'rainy', 'snowy']
prob_W_T_cols = ['hot', 'cold']
prob_W_T_array = np.array([[3/10, 1/5], [1/30, 2/15], [0, 1/3]])
print_joint_prob_table_array(prob_W_T_array, prob_W_T_rows,
prob_W_T_cols)
## W=rainy T=cold
prob_W_T_array[prob_W_T_rows.index('rainy'),
prob_W_T_cols.index('cold')]
## map to index
prob_W_T_row_mapping = {label: idx
for idx, label in enumerate(prob_W_T_rows)}
prob_W_T_col_mapping = {label: idx
for idx, label in enumerate(prob_W_T_cols)}
## W=w T=t
w='rainy'
t='cold'
prob_W_T_array[prob_W_T_row_mapping[w],
prob_W_T_col_mapping[t]]
X = {w: {} for w in weather}
X['sunny'] = 1/2
X['rainy'] = 1/6
X['snowy'] = 1/3
X
Y = {1: 1/2, 0: 1/2}
Y
from simpsons_paradox_data import *
'''
example p_(G,D,A)(female, C, admitted)
G: gender, D: department applied to (A-F), A: admitted or rejected
'''
# prob women applied to department C and admitted
joint_prob_table[gender_mapping['female'],
department_mapping['C'],
admission_mapping['admitted']]
joint_prob_gender_admission = joint_prob_table.sum(axis=1)
## droping department (axis 1)
joint_prob_gender_admission
## female apply and admitted
joint_prob_gender_admission[gender_mapping['female'],
admission_mapping['admitted']]
## conditioning: admission given applicant is female
female_only = joint_prob_gender_admission[gender_mapping['female']]
prob_admission_given_female = female_only / np.sum(female_only)
## as dict
prob_admission_given_female_dict = \
dict(zip(admission_labels, prob_admission_given_female))
prob_admission_given_female_dict
## admission give male
males = joint_prob_gender_admission[gender_mapping['male']]
admission_male = males / np.sum(males)
admission_given_male = dict(zip(admission_labels, admission_male))
admission_given_male
admitted_only = joint_prob_gender_admission[:,
admission_mapping['admitted']]
## prob of gender give admitted
prob_gender_given_admitted = admitted_only / np.sum(admitted_only)
prob_gender_given_admitted_dict = dict(zip(gender_labels, prob_gender_given_admitted))
print(prob_gender_given_admitted_dict)
female_and_A_only = joint_prob_table[gender_mapping['female'], department_mapping['A']]
female_and_A_only[admission_mapping['admitted']] / female_and_A_only.sum()
# print(department_labels)
def gender_dep_mapper(g, d, a):
g_and_d_only = joint_prob_table[gender_mapping[g],
department_mapping[d]]
return g_and_d_only[admission_mapping[a]] / g_and_d_only.sum()
female_admitted_given_d_and_g = {l:gender_dep_mapper('female',l, 'admitted')
for l in department_labels}
male_admitted_given_d_and_g = {l:gender_dep_mapper('male',l, 'admitted')
for l in department_labels}
fem, mal = [], []
for i in department_labels:
f = female_admitted_given_d_and_g[i]
m = male_admitted_given_d_and_g[i]
print('%s: female: %.4f | male: %.4f' % (i, f, m))
def pmf(x,y):
x_constraints = {1,2,4}
y_constraints = {1,3}
if x in x_constraints and y in y_constraints:
# print((x,y))
return [(x,y), x**2 + y**2]
return 0
xs = {1,2,3,4}
ys = {1,2,3}
p = {}
for x in xs:
for y in ys:
pmf_ = pmf(x,y)
if pmf_:
p[pmf_[0]] = pmf_[1]
c = sum(p.values())
print(c)
p.items()
## P(Y<X)
sum(list(map(lambda x: x[1] if (x[0][1]<x[0][0]) else 0, p.items())))/72
## P(Y=3)
sum(list(map(lambda x: x[1] if (x[0][1]==3) else 0, p.items())))/72
p
pmf_p = {k:v/72 for k,v in p.items()}
pmf_p
px = {}
for x in xs:
for k,v in pmf_p.items():
# print(k, x)
if x == k[0]:
if x in px:
px[x] += v
else:
px[x] = v
elif x not in px:
px[x] = 0
print(px, sum(px.values()))
py = {}
for y in ys:
for k,v in pmf_p.items():
# print(k, x)
if y == k[1]:
if y in py:
py[y] += v
else:
py[y] = v
elif y not in py:
py[y] = 0
print(py, sum(py.values()))
A nice intro to probability using python here. Going through probability spaces, random variablce (joint/disjoint), conditioning, PMFs, Bayes Rule and total probability theorem. Fairly basic stuff but made sort of awesome with Python! I don't understand why probability isn't always taught through programming, it eliminates tedious summing and integrating with pmfs and cdfs or counting problems, visually with dictionaries you immediately get to see (small) samples spaces and it helps focuse more on underlying thoeries. Up next with probability is independence:structure in distributions, measures of randomness: entropy and information divergence and mutual information. Followed up with movie recommendations side project I'm finishing.