From a9ec76a470c0a0666f5420fa399e2549f7d31b46 Mon Sep 17 00:00:00 2001
From: Erick Vega <efvega@umich.edu>
Date: Sun, 27 Mar 2022 13:27:38 -0400
Subject: [PATCH] Adds a file for working with multivariate t distribution

---
 src/distribution_tools/multivariatet.py | 271 ++++++++++++++++++++++++
 1 file changed, 271 insertions(+)
 create mode 100644 src/distribution_tools/multivariatet.py

diff --git a/src/distribution_tools/multivariatet.py b/src/distribution_tools/multivariatet.py
new file mode 100644
index 0000000..8551d99
--- /dev/null
+++ b/src/distribution_tools/multivariatet.py
@@ -0,0 +1,271 @@
+"""Multivariate t-distribution.
+
+Author: Gregory Gundersen 2020. Architecture based on SciPy's
+`_multivariate.py` module by Joris Vankerschaver 2013.
+"""
+
+import numpy as np
+from   scipy._lib._util import check_random_state
+from   scipy.stats._multivariate import _PSD, multi_rv_generic, multi_rv_frozen
+from   scipy.special import gammaln
+
+
+# -----------------------------------------------------------------------------
+
+class multivariate_t_gen(multi_rv_generic):
+
+    def __init__(self, seed=None):
+        """Initialize a multivariate t-distributed random variable.
+
+        Parameters
+        ----------
+        seed : Random state.
+        """
+        self._random_state = check_random_state(seed)
+
+    def __call__(self, mean=None, shape=1, df=1, seed=None):
+        """Create a frozen multivariate t-distribution. See
+        `multivariate_t_frozen` for parameters.
+        """
+        return multivariate_t_frozen(mean=mean, shape=shape, df=df, seed=seed)
+
+    def pdf(self, x, mean=None, shape=1, df=1):
+        """Multivariate t-distribution probability density function.
+
+        Parameters
+        ----------
+        x : array_like
+            Points at which to evaluate the log of the probability density
+            function.
+        mean : array_like, optional
+            Mean of the distribution (default zero).
+        shape : array_like, optional
+            Positive definite shape matrix. This is not the distribution's
+            covariance matrix (default one).
+        df : Degrees of freedom.
+
+        Returns
+        -------
+        logpdf : Probability density function evaluated at `x`.
+
+        Examples
+        --------
+        FIXME.
+        """
+        dim, mean, shape, df = self._process_parameters(mean, shape, df)
+        x = self._process_quantiles(x, dim)
+        shape_info = _PSD(shape)
+        lp = self._logpdf(x, mean, shape_info.U, shape_info.log_pdet, df, dim)
+        return np.exp(lp)
+
+    def logpdf(self, x, mean=None, shape=1, df=1):
+        """Log of the multivariate t-distribution probability density function.
+
+        Parameters
+        ----------
+        x : array_like
+            Points at which to evaluate the log of the probability density
+            function.
+        mean : array_like, optional
+            Mean of the distribution (default zero).
+        shape : array_like, optional
+            Positive definite shape matrix. This is not the distribution's
+            covariance matrix (default one).
+        df : Degrees of freedom.
+
+        Returns
+        -------
+        logpdf : Log of the probability density function evaluated at `x`.
+
+        Examples
+        --------
+        FIXME.
+        """
+        dim, mean, shape, df = self._process_parameters(mean, shape, df)
+        x = self._process_quantiles(x, dim)
+        shape_info = _PSD(shape)
+        return self._logpdf(x, mean, shape_info.U, shape_info.log_pdet, df, dim)
+
+    def _logpdf(self, x, mean, U, log_pdet, df, dim):
+        """Utility method `pdf`, `logpdf` for parameters.
+        """
+        dev  = x - mean
+        maha = np.square(np.dot(dev, U)).sum(axis=-1)
+
+        t = 0.5 * (df + dim)
+        A = gammaln(t)
+        B = gammaln(0.5 * df)
+        C = dim/2. * np.log(df * np.pi)
+        D = 0.5 * log_pdet
+        E = -t * np.log(1 + (1./df) * maha)
+
+        return A - B - C - D + E
+
+    def rvs(self, mean=None, shape=1, df=1, size=1, random_state=None):
+        """Draw random samples from a multivariate t-distribution.
+
+        Parameters
+        ----------
+        x : array_like
+            Points at which to evaluate the log of the probability density
+            function.
+        mean : array_like, optional
+            Mean of the distribution (default zero).
+        shape : array_like, optional
+            Positive definite shape matrix. This is not the distribution's
+            covariance matrix (default one).
+        df : Degrees of freedom.
+
+        Returns
+        -------
+
+        Examples
+        --------
+        FIXME.
+        """
+        dim, mean, shape, df = self._process_parameters(mean, shape, df)
+        if random_state is not None:
+            rng = check_random_state(random_state)
+        else:
+            rng = self._random_state
+
+        if df == np.inf:
+            x = np.ones(size)
+        else:
+            x = rng.chisquare(df, size=size) / df
+
+        z = rng.multivariate_normal(np.zeros(dim), shape, size=size)
+        samples = mean + z / np.sqrt(x)[:, None]
+        return samples
+
+    def _process_quantiles(self, x, dim):
+        """Adjust quantiles array so that last axis labels the components of
+        each data point.
+        """
+        x = np.asarray(x, dtype=float)
+        if x.ndim == 0:
+            x = x[np.newaxis]
+        elif x.ndim == 1:
+            if dim == 1:
+                x = x[:, np.newaxis]
+            else:
+                x = x[np.newaxis, :]
+        return x
+
+    def _process_parameters(self, mean, shape, df):
+        """Infer dimensionality from mean array and shape matrix, handle
+        defaults, and ensure compatible dimensions.
+        """
+        if mean is None and shape is None:
+            shape = np.asarray(1, dtype=float)
+            dim = 1
+        elif mean is None:
+            shape = np.asarray(shape, dtype=float)
+            if shape.ndim < 2:
+                dim = 1
+            else:
+                dim = shape.shape[0]
+            mean = np.zeros(dim)
+        else:
+            shape = np.asarray(shape, dtype=float)
+            mean = np.asarray(mean, dtype=float)
+            dim = mean.size
+
+        # FIXME: Why is this here?
+        if dim == 1:
+            mean.shape = (1,)
+            shape.shape = (1, 1)
+
+        if mean.ndim != 1 or mean.shape[0] != dim:
+            raise ValueError("Array 'mean' must be a vector of length %d." %
+                             dim)
+        if shape.ndim == 0:
+            shape = shape * np.eye(dim)
+        elif shape.ndim == 1:
+            shape = np.diag(shape)
+        elif shape.ndim == 2 and shape.shape != (dim, dim):
+            rows, cols = shape.shape
+            if rows != cols:
+                msg = ("Array 'cov' must be square if it is two dimensional,"
+                       " but cov.shape = %s." % str(shape.shape))
+            else:
+                msg = ("Dimension mismatch: array 'cov' is of shape %s,"
+                       " but 'mean' is a vector of length %d.")
+                msg = msg % (str(shape.shape), len(mean))
+            raise ValueError(msg)
+        elif shape.ndim > 2:
+            raise ValueError("Array 'cov' must be at most two-dimensional,"
+                             " but cov.ndim = %d" % shape.ndim)
+
+        # Process degrees of freedom.
+        if df is None:
+            df = 1
+        if not isinstance(df, int) and not np.isinf(df):
+            raise ValueError("'df' must be an integer or 'np.inf' but is of "
+                             "type %s" % type(df))
+
+        return dim, mean, shape, df
+
+
+class multivariate_t_frozen(multi_rv_frozen):
+
+    def __init__(self, mean=None, shape=1, df=1, seed=None):
+        """
+        Create a frozen multivariate normal distribution.
+
+        Parameters
+        ----------
+        x : array_like
+            Points at which to evaluate the log of the probability density
+            function.
+        mean : array_like, optional
+            Mean of the distribution (default zero).
+        shape : array_like, optional
+            Positive definite shape matrix. This is not the distribution's
+            covariance matrix (default one).
+        df : Degrees of freedom.
+
+        Examples
+        --------
+        FIXME.
+        """
+        self._dist = multivariate_t_gen(seed)
+        dim, mean, shape, df = self._dist._process_parameters(mean, shape, df)
+        self.dim, self.mean, self.shape, self.df = dim, mean, shape, df
+        self.shape_info = _PSD(shape)
+
+    def logpdf(self, x):
+        x = self._dist._process_quantiles(x, self.dim)
+        U = self.shape_info.U
+        log_pdet = self.shape_info.log_pdet
+        return self._dist._logpdf(x, self.mean, U, log_pdet, self.df, self.dim)
+
+    def pdf(self, x):
+        return np.exp(self.logpdf(x))
+
+    def rvs(self, size=1, random_state=None):
+        """
+        Draw random samples from a multivariate normal distribution.
+
+        Parameters
+        ----------
+        size : integer, optional
+            Number of samples to draw (default 1).
+        random_state : np.random.RandomState instance
+            RandomState used for drawing the random variates.
+
+        Returns
+        -------
+        rvs : ndarray or scalar
+            Random variates of size (`size`, `N`), where `N` is the
+            dimension of the random variable.
+        """
+        return self._dist.rvs(mean=self.mean,
+                              shape=self.shape,
+                              df=self.df,
+                              size=size,
+                              random_state=random_state)
+
+# -----------------------------------------------------------------------------
+
+multivariate_t = multivariate_t_gen()
-- 
GitLab