Using numpy vectorize

396 Views Asked by At

I'm trying to do some bayesian probit code using data augmentation. I can get it to work if I loop over the rows of the output matrix, but I'd like to vectorize it and do it all in one shot (presumably that's faster).

import numpy as np
from numpy import random
import statsmodels.api as sm
from scipy import stats
from scipy.stats import norm, truncnorm
##################################
### Create some simulated data ###
num_leg = 50
num_bills = 20

a = np.random.uniform(-1,1,num_bills).reshape(num_bills, 1)
b = np.random.uniform(-2,2,num_bills).reshape(num_bills, 1)
x = np.random.standard_normal(num_leg).reshape(num_leg, 1)
ystar_base = a + np.dot(b,x.T)
epsilon = np.random.standard_normal(num_leg * num_bills).reshape(num_bills, num_leg)
ystar = ystar_base + epsilon
y = 1*(ystar >0)

### Initialize some stuff I need ###
avec = [0]*num_bills # These are bill parameters
bvec = [0]*num_bills
betavec = [np.matrix(zip(avec,bvec))]
xvec = [0]*num_leg # these are legislator parameters 
_ones = np.ones(num_leg)

def init_y(mat): # initialize a latent y matrix
    if mat==1: return truncnorm.rvs(0,10000)
    else: return truncnorm.rvs(-10000,0)

vectorize_y = np.vectorize(init_y)
latent_y = np.matrix(vectorize_y(y))

burn = 500 # How long to run the MCMC
runs = 500

### define the functions ###
def sample_params(xnow,ynow): # This is the function I'd like to vectorize
    if type(xnow) == list:
        xnow = np.array(xnow)
    if type(ynow) == list:
        ynow = np.array(ynow)

    ynow = ynow.T  #reshape(ynow.shape[0],1)
    sigma = np.linalg.inv(np.dot(xnow.T,xnow)) ###This is the line that produces an error###
    xy = np.dot(xnow.T,ynow)
    mu = np.dot(sigma, xy) # this is just (x'x)inv x'y
    return np.random.multivariate_normal(np.array(mu).flatten(), sigma)

vecparams = np.vectorize(sample_params)

def get_mu(xnow, bnow): # getting the updated mean to draw the latent ys
    if type(xnow) == list:
        xnow = np.array(xnow)
    if type(bnow) == list:
        bnow = np.array(bnow)
    mu = np.dot(xnow,bnow.T)
    mu = np.matrix(mu)
    return mu

def sample_y(mu, ynow): # generate latent y matrix
    if ynow==1: 
        a, b = (0 - mu),(10000-mu)
    else: 
        a, b = (-10000 - mu),(0-mu)
    return truncnorm.rvs(a,b)

vector_sample = np.vectorize(sample_y) # I'd like to be able to do something like this

### Here's the MCMC loop with the internal loop over rows(bills)
for i in range(burn+runs):
    this_beta = []
    this_x = []
    this_y = []
    for j in range(num_bills): #I'd like to get rid of this loop
        ex = zip(x_ones, x)
        newbeta = sample_params(ex, latent_y[j])
        this_beta.append(newbeta)
    #ex = np.array(zip(x_ones, x))
    #this_beta = vecparams(ex, latent_y[:,]) # and call the vectorized function here
    betavec.append(this_beta)

    #Note, I can vectorize the latent outputs easily enough here
    mean = get_mu(ex, betavec[-1])
    latent_y = np.matrix(vector_sample(mean, np.matrix(y).T).T.reshape(latent_y.shape[0], latent_y.shape[1]))

### Now a bit of code to check to see if I've recovered what I want ###
test_beta = [zip(*(z)) for z in betavec[burn:]]
test_a = np.array([z[0] for z in test_beta])
test_b = np.array([z[1] for z in test_beta])

amean = test_a.sum(axis = 0)/float(runs)
bmean = test_b.sum(axis = 0)/float(runs)

print 'a mean'
print np.corrcoef([amean, np.array(a)])
print
print 'b mean'
print np.corrcoef([bmean, np.array(b)])

If I comment out the loop and use the commented out lines just above, I get the following error at the line I indicated earlier (the one that defines sigma):

LinAlgError: 0-dimensional array given. Array must be at least two-dimensional
0

There are 0 best solutions below