-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbayesian_mcmc.py
93 lines (81 loc) · 2.71 KB
/
bayesian_mcmc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import numpy as np
import numpy.linalg as LA
import itertools as it
import matplotlib.pyplot as plt
"""
Demonstration of Markov Chain Monte Carlo Bayesian inference.
Bayesian MCMC is a technique for approximating the
posterior distribution over parameters of some data-generating process
given a data set and, optionally, a prior distribution over parameters.
This toy problem demonstrates with a 2D Gaussian distribution.
However, Bayesian MCMC is most useful when the parameter space
is high-dimensional and the above prior has no analytic solution.
"""
# Metropolis algorithm, a simple MCMC sampling procedure.
# implemented as generator function, use itertools.islice
def metropolis(log_pdf, sampler, x0):
x = x0
while True:
xp = sampler(x)
log_ratio = log_pdf(xp) - log_pdf(x)
u = np.random.uniform()
if np.log(u) < log_ratio:
x = xp
yield x
# create a function that computes the log probability density
# of each point in a data set (points are rows)
def gaussian_log_pdf(mu, sigma):
sinv = LA.inv(sigma)
mu = np.array(mu)
def logpdf(x):
d = x - mu
prod = np.sum(np.dot(d, sinv) * d, axis=1)
scl = LA.det(2 * np.pi * sigma) ** -0.5
return np.log(scl) - 0.5 * prod
return logpdf
# main
def metro_bayes():
# the true parameters of the data
mu = np.array([1,2])
sigma = np.diag([0.05, 1])
# generate the data
kd = 200
data = np.dot(np.random.normal(size=(kd,2)), LA.cholesky(sigma)) + mu
# function to compute probability of (mu, sigma) params
# given the data. We could also add a prior here if desired.
def log_pdf(params):
mu = params[:2]
sigma = np.diag(params[2:])
if np.any(params[2:] <= 0.0001):
return -np.inf
g = gaussian_log_pdf(mu, sigma)
logp = np.sum(g(data))
return logp
# function that takes a step in the parameter space for Metropolis.
# step size needs to be tuned to number of data points (kd above).
# with more data, pdf of parameters is sharper, so step needs to be smaller,
# otherwise Metropolis will never take a step once it reaches
# a high-probability region.
def sampler(x):
return x + 0.01*np.random.normal(size=4)
# initial guess unit normal
x0 = np.array([0, 0, 1, 1])
k = 10000
m = np.row_stack(it.islice(
metropolis(log_pdf, sampler, x0),
k))
# remove first chunk of data for MCMC burn-in
m = m[2000:,:]
# histogram the posterior distributions of the parameters.
names = ("${\mu_0}$", "${\mu_1}$", "${\sigma_0}$", "${\sigma_1}$")
true_vals = np.concatenate([mu, np.diag(sigma)])
for i in range(4):
plt.subplot(2, 2, i + 1)
plt.hist(m[:,i])
plt.axvline(x=true_vals[i], color=(1,0,0))
# no idea why pyplot reverses these
plt.legend(["true value", "MCMC posterior"])
plt.xlabel(names[i], FontSize=16)
plt.ylabel("count")
plt.show()
metro_bayes()