"""Bayesian regression. A class the implements the Bayesian Regression. """ import operator as op from functools import reduce import copy import collections import numpy as np from scipy.stats import invgamma from scipy.stats import multivariate_normal class BayesianLinearRegression: def __init__(self, percent=95, l2=True, prior=None): if prior is not None: raise NameError("Currently only support uninformative prior, set to None plz.") self.percent = percent self.l2 = l2 def fit(self, xtrain, ytrain, sample_weight, compute_creds=True): """ Fit the bayesian linear regression. Arguments: xtrain: the training data ytrain: the training labels sample_weight: the weights for fitting the regression """ # store weights weights = sample_weight # add intercept xtrain = np.concatenate((np.ones(xtrain.shape[0])[:,None], xtrain), axis=1) diag_pi_z = np.zeros((len(weights), len(weights))) np.fill_diagonal(diag_pi_z, weights) if self.l2: V_Phi = np.linalg.inv(xtrain.transpose().dot(diag_pi_z).dot(xtrain) \ + np.eye(xtrain.shape[1])) else: V_Phi = np.linalg.inv(xtrain.transpose().dot(diag_pi_z).dot(xtrain)) Phi_hat = V_Phi.dot(xtrain.transpose()).dot(diag_pi_z).dot(ytrain) N = xtrain.shape[0] Y_m_Phi_hat = ytrain - xtrain.dot(Phi_hat) s_2 = (1.0 / N) * (Y_m_Phi_hat.dot(diag_pi_z).dot(Y_m_Phi_hat) \ + Phi_hat.transpose().dot(Phi_hat)) self.score = s_2 self.s_2 = s_2 self.N = N self.V_Phi = V_Phi self.Phi_hat = Phi_hat self.coef_ = Phi_hat[1:] self.intercept_ = Phi_hat[0] self.weights = weights if compute_creds: self.creds = self.get_creds(percent=self.percent) else: self.creds = "NA" self.crit_params = { "s_2": self.s_2, "N": self.N, "V_Phi": self.V_Phi, "Phi_hat": self.Phi_hat, "creds": self.creds } return self def predict(self, data): """ The predictive distribution. Arguments: data: The data to predict """ q_1 = np.eye(data.shape[0]) data_ones = np.concatenate((np.ones(data.shape[0])[:,None], data), axis=1) # Get response response = np.matmul(data, self.coef_) response += self.intercept_ # Compute var temp = np.matmul(data_ones, self.V_Phi) mat = np.matmul(temp, data_ones.transpose()) var = self.s_2 * (q_1 + mat) diag = np.diagonal(var) return response, np.sqrt(diag) def get_ptg(self, desired_width): """ Compute the ptg perturbations. """ cert = (desired_width / 1.96) ** 2 S = self.coef_.shape[0] * self.s_2 T = np.mean(self.weights) return 4 * S / (self.coef_.shape[0] * T * cert) def get_creds(self, percent=95, n_samples=10_000, get_intercept=False): """ Get the credible intervals. Arguments: percent: the percent cutoff for the credible interval, i.e., 95 is 95% credible interval n_samples: the number of samples to compute the credible interval get_intercept: whether to include the intercept in the credible interval """ samples = self.draw_posterior_samples(n_samples, get_intercept=get_intercept) creds = np.percentile(np.abs(samples - (self.Phi_hat if get_intercept else self.coef_)), percent, axis=0) return creds def draw_posterior_samples(self, num_samples, get_intercept=False): """ Sample from the posterior. Arguments: num_samples: number of samples to draw from the posterior get_intercept: whether to include the intercept """ sigma_2 = invgamma.rvs(self.N / 2, scale=(self.N * self.s_2) / 2, size=num_samples) phi_samples = [] for sig in sigma_2: sample = multivariate_normal.rvs(mean=self.Phi_hat, cov=self.V_Phi * sig, size=1) phi_samples.append(sample) phi_samples = np.vstack(phi_samples) if get_intercept: return phi_samples else: return phi_samples[:, 1:]