Comparison: Logistic Regression¶

Here, the InferPy code is compared with other similar frameworks. A logistic regression will be considered.

Setting up¶

First the required packages are imported. Variable d is the number of predictive attributes while N is the number of observations.

import inferpy as inf
import numpy as np
import tensorflow as tf

d = 2
N = 10000

from tensorflow_probability import edward2 as ed
import tensorflow as tf

d = 2
N = 50000

import torch
import pyro
from pyro.distributions import Normal, Binomial
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam
from pyro.contrib.autoguide import AutoDiagonalNormal

d = 2
N = 1000

Model definition¶

Models are defined as functions. In case of InferPy these must be decoraed with @inf.probmodel. Inspired in Pyro, InferPy uses construct inf.datamodel for simplifying the definition of the variables dimension. In the following code fragments, P and Q models are defined.

@inf.probmodel
def log_reg(d):
    w0 = inf.Normal(0., 1., name="w0")
    w = inf.Normal(np.zeros([d, 1]), np.ones([d, 1]), name="w")

    with inf.datamodel():
        x = inf.Normal(np.zeros(d), 2., name="x")  # the scale is broadcasted to shape [d] because of loc
        y = inf.Bernoulli(logits=w0 + x @ w, name="y")


@inf.probmodel
def qmodel(d):
    qw0_loc = inf.Parameter(0., name="qw0_loc")
    qw0_scale = tf.math.softplus(inf.Parameter(1., name="qw0_scale"))
    qw0 = inf.Normal(qw0_loc, qw0_scale, name="w0")

    qw_loc = inf.Parameter(tf.zeros([d, 1]), name="qw_loc")
    qw_scale = tf.math.softplus(inf.Parameter(tf.ones([d, 1]), name="qw_scale"))
    qw = inf.Normal(qw_loc, qw_scale, name="w")

def log_reg(d, N, w_init=(1, 1), x_init=(0, 1)):
    w = ed.Normal(loc=tf.ones([d], dtype="float32") * w_init[0], scale=1. * w_init[1], name="w")
    w0 = ed.Normal(loc=1. * w_init[0], scale=1. * w_init[1], name="w0")

    x = ed.Normal(loc=tf.ones([N, d], dtype="float32") * x_init[0], scale=1. * x_init[1], name="x")
    y = ed.Bernoulli(logits=tf.tensordot(x, w, axes=[[1], [0]]) + w0, name="y")

    return x, y, (w, w0)


def qmodel(d, N):
    qw_loc = tf.Variable(tf.ones([d]))
    qw_scale = tf.math.softplus(tf.Variable(tf.ones([d])))
    qw0_loc = tf.Variable(1.)
    qw0_scale = tf.math.softplus(tf.Variable(1.))

    qw = ed.Normal(loc=qw_loc, scale=qw_scale, name="qw")
    qw0 = ed.Normal(loc=qw0_loc, scale=qw0_scale, name="qw0")
    return qw, qw0

def log_reg(x_data=None, y_data=None):
    w = pyro.sample("w", Normal(torch.zeros(d), torch.ones(d)))
    w0 = pyro.sample("w0", Normal(0., 1.))

    with pyro.plate("map", N):
        x = pyro.sample("x", Normal(torch.zeros(d), 2).to_event(1), obs=x_data)
        logits = (w0 + x @ torch.FloatTensor(w)).squeeze(-1)
        y = pyro.sample("pred", Binomial(logits = logits), obs=y_data)

    return x,y


qmodel = AutoDiagonalNormal(log_reg)

Sample form the pior model¶

Now we can sample from the P-model in which the global parameters are fixed. As it can be observed below, this is more complex in TFP.

# instance of the model
m = log_reg(d)

# create toy data
data = m.prior(["x", "y"], data={"w0": 0, "w": [[2], [1]]}).sample(N)
x_train = data["x"]
y_train = data["y"]

def set_values(**model_kwargs):
    """Creates a value-setting interceptor."""

    def interceptor(f, *args, **kwargs):
        """Sets random variable values to its aligned value."""
        name = kwargs.get("name")
        if name in model_kwargs:
            kwargs["value"] = model_kwargs[name]
        else:
            print(f"set_values not interested in {name}.")
        return ed.interceptable(f)(*args, **kwargs)

    return interceptor


with ed.interception(set_values(w=[2, 1], w0=0)):
    generate = log_reg(d, N, x_init=(2, 10))

with tf.Session() as sess:
    x_train, y_train, _ = sess.run(generate)

sampler = pyro.condition(log_reg, data={"w0": 0, "w": [2,1]})
x_train, y_train = sampler()

Inference¶

Using the data generated, variational inference can be done as follows. This is quite simple with our package, while TFP and Pyro require the user to implement optimization loop.

VI = inf.inference.VI(qmodel(d), epochs=10000)
m.fit({"x": x_train, "y": y_train}, VI)

qw, qw0 = qmodel(d, N)

with ed.interception(set_values(w=qw, w0=qw0, x=x_train, y=y_train)):
    post_x, post_y, (post_w, post_w0) = log_reg(d, N)

energy = tf.reduce_sum(post_x.distribution.log_prob(post_x.value)) +  \
                    tf.reduce_sum(post_y.distribution.log_prob(y_train)) + \
                    tf.reduce_sum(post_w.distribution.log_prob(qw.value)) +  \
                    tf.reduce_sum(post_w0.distribution.log_prob(qw0.value))

entropy = -(tf.reduce_sum(qw.distribution.log_prob(qw.value)) + \
            tf.reduce_sum(qw0.distribution.log_prob(qw0.value)))


# ELBO definition
elbo = energy + entropy


# Optimization loop
optimizer = tf.train.AdamOptimizer(learning_rate=0.05)
train = optimizer.minimize(-elbo)

init = tf.global_variables_initializer()

t = []
num_epochs = 10000
with tf.Session() as sess:
    sess.run(init)

    for i in range(num_epochs):
        sess.run(train)
        if i % 5 == 0:
            t.append(sess.run([elbo]))

            if i % 50 == 0:
                print(sess.run(elbo))

    w_post = sess.run(qw.distribution.loc)
    w0_post = sess.run(qw0.distribution.loc)

optim = Adam({"lr": 0.1})
svi = SVI(log_reg, qmodel, optim, loss=Trace_ELBO(), num_samples=10)


num_iterations = 10000
pyro.clear_param_store()
for j in range(num_iterations):
    # calculate the loss and take a gradient step
    loss = svi.step(x_train, y_train)
    print("[iteration %04d] loss: %.4f" % (j + 1, loss / len(x_train)))

Usage of the inferred model¶

Finally, the posterior distributions of the global parameters w can be shown w0. From the posterior predictive distribution, samples can be generated as follows.

# Print the parameters
w_post = m.posterior("w").parameters()["loc"]
w0_post = m.posterior("w0").parameters()["loc"]

print(w_post, w0_post)

# Sample from the posterior
post_sample = m.posterior_predictive(["x","y"], data={"w":w_post, "w":w0_post}).sample()
x_gen = post_sample["x"]
y_gen = post_sample["y"]

print(x_gen, y_gen)

# Print the parameters
print(w_post, w0_post)

# Sample form the posterior
with ed.interception(set_values(w=w_post, w0=w0_post)):
    generate = log_reg(d, N, x_init=(0, 0))

with tf.Session() as sess:
    x_gen, y_gen, _ = sess.run(generate)

print(x_gen, y_gen)

# Print the parameters
w_post = qmodel()["w"]
w0_post = qmodel()["w0"]

print(w_post, w0_post)

# Sample from the posterior
sampler_post = pyro.condition(log_reg, data={"w0": w0_post, "w": w_post})
x_gen, y_gen = sampler_post()

print(x_gen, y_gen)