5 years ago · 0d0083d380
--- a/labels/crowd_labeling/CLLDA.py
+++ b/labels/crowd_labeling/CLLDA.py
@@ -0,0 +1,525 @@
 
				+import numpy as np
			
 
				+try:
			
 
				+    from scipy.stats import mode
			
 
				+except ImportError:
			
 
				+    def mode(a, axis=0):
			
 
				+        scores = np.unique(np.ravel(a))  # get ALL unique values
			
 
				+        testshape = list(a.shape)
			
 
				+        testshape[axis] = 1
			
 
				+        oldmostfreq = np.zeros(testshape)
			
 
				+        oldcounts = np.zeros(testshape)
			
 
				+
			
 
				+        for score in scores:
			
 
				+            template = (a == score)
			
 
				+            counts = np.expand_dims(np.sum(template, axis), axis)
			
 
				+            mostfrequent = np.where(counts > oldcounts, score, oldmostfreq)
			
 
				+            oldcounts = np.maximum(counts, oldcounts)
			
 
				+            oldmostfreq = mostfrequent
			
 
				+
			
 
				+        return mostfrequent, oldcounts
			
 
				+import multiprocessing as mp
			
 
				+from time import time
			
 
				+from copy import deepcopy
			
 
				+from crowd_labeling.logratio_transformations import \
			
 
				+    centered_log_ratio_transform as clr, \
			
 
				+    isometric_log_ratio_transform as ilr, \
			
 
				+    additive_log_ratio_transform as alr, \
			
 
				+    make_projection_matrix as mpm
			
 
				+
			
 
				+
			
 
				+class CLLDA:
			
 
				+    """
			
 
				+    The :class: CLLDA is a python implementation of Crowd Labeling Latent Dirichlet Allocation.
			
 
				+
			
 
				+    This algorithm processes crowd labeling (aka crowd consensus) data where workers label instances
			
 
				+    as pertaining to one or more classes. Allows for calculating resulting label estimates and
			
 
				+    covariances in multiple log-ratio transformed spaces.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, votes, workers, instances, vote_ids=None, worker_ids=None, instance_ids=None,
			
 
				+                 worker_prior=None, instance_prior=None, transform=None,
			
 
				+                 num_epochs=1000, burn_in=200, updateable=True, save_samples=False, seed=None):
			
 
				+
			
 
				+        """
			
 
				+        Initializes settings for the model and automatically calls the inference function.
			
 
				+
			
 
				+        :param votes: List of vote values.
			
 
				+        :param workers: List of uses who submitted :param votes.
			
 
				+        :param instances: List of instances to which the :param votes pertain.
			
 
				+        :param vote_ids: (optional) List of vote ids. If provided, :param votes should be a list of integers.
			
 
				+        :param instance_ids: (optional) List of instance ids. If provided, :param instances should be a list of integers.
			
 
				+        :param worker_ids: (optional) List of worker ids. If provided, :param workers should be a list of integers.
			
 
				+        :param worker_prior: (optional) Matrix prior for worker skill (pseudovotes).
			
 
				+        :param instance_prior: (optional) List of class priors (pseudovotes)
			
 
				+        :param transform: log-ratio transform to use.
			
 
				+        :param num_epochs: number of epochs to run for.
			
 
				+        :param burn_in: number of epochs to ignore for convergence of the Gibbs chain.
			
 
				+        :param updateable: If True, will save vote-classes between runs at the expense of memory.
			
 
				+        :param save_samples: option to save vote-classes after each epoch (very memory intensive).
			
 
				+        :param seed: seed for the random number generator if reproducibility is desired.
			
 
				+        """
			
 
				+
			
 
				+        # set random seed
			
 
				+        if seed is None:
			
 
				+            seed = np.random.randint(int(1e8))
			
 
				+        self.rng = np.random.RandomState(seed=seed)
			
 
				+
			
 
				+        # data info and priors
			
 
				+        self.V = len(votes)
			
 
				+        self.U = len(np.unique(workers))
			
 
				+        self.I = len(np.unique(instances))
			
 
				+        if worker_prior is not None:
			
 
				+            self.worker_prior = np.array(worker_prior)
			
 
				+            if self.worker_prior.ndim == 2:
			
 
				+                self.worker_prior = np.tile(self.worker_prior[np.newaxis, :, :], [self.U, 1, 1])
			
 
				+            self.C = self.worker_prior.shape[1]
			
 
				+            self.R = self.worker_prior.shape[2]
			
 
				+        else:
			
 
				+            if vote_ids is not None:
			
 
				+                self.C = len(vote_ids)
			
 
				+                self.R = self.C
			
 
				+            else:
			
 
				+                self.R = len(np.unique(votes))
			
 
				+                self.C = self.R
			
 
				+            self.worker_prior = (np.eye(self.R) + np.ones((self.R, self.R)) / self.R) * 3
			
 
				+            self.worker_prior = np.tile(self.worker_prior[np.newaxis, :, :], [self.U, 1, 1])
			
 
				+        if instance_prior is None:
			
 
				+            self.instance_prior = np.ones(self.C) / self.C / 4
			
 
				+        else:
			
 
				+            self.instance_prior = instance_prior
			
 
				+
			
 
				+        # determine vote IDs
			
 
				+        if vote_ids is None:
			
 
				+            self.vote_ids = np.unique(votes)
			
 
				+            vote_dict = {y: x for x, y in enumerate(self.vote_ids)}
			
 
				+            votes = np.array([vote_dict[x] for x in votes])
			
 
				+        else:
			
 
				+            self.vote_ids = vote_ids
			
 
				+        # determine instance IDs
			
 
				+        if instance_ids is None:
			
 
				+            self.instance_ids = np.unique(instances)
			
 
				+            instance_dict = {y: x for x, y in enumerate(self.instance_ids)}
			
 
				+            instances = np.array([instance_dict[x] for x in instances])
			
 
				+        else:
			
 
				+            self.instance_ids = instance_ids
			
 
				+        # determine worker IDs
			
 
				+        if worker_ids is None:
			
 
				+            self.worker_ids = np.unique(workers)
			
 
				+            worker_dict = {y: x for x, y in enumerate(self.worker_ids)}
			
 
				+            workers = np.array([worker_dict[x] for x in workers])
			
 
				+        else:
			
 
				+            self.worker_ids = worker_ids
			
 
				+
			
 
				+        # cl_transform info
			
 
				+        if not isinstance(transform, str) and hasattr(transform, '__iter__'):
			
 
				+            self.transform = tuple(transform)
			
 
				+        else:
			
 
				+            self.transform = (transform,)
			
 
				+
			
 
				+        # Gibbs sampling parameters
			
 
				+        self.num_epochs = num_epochs
			
 
				+        self.burn_in = burn_in
			
 
				+        self.num_samples = num_epochs - burn_in
			
 
				+
			
 
				+        # info to save
			
 
				+        self.LL = np.nan * np.ones(self.num_epochs)
			
 
				+        self.worker_mats = np.zeros((self.U, self.C, self.R))
			
 
				+        self.labels, self.labels_cov = list(), list()
			
 
				+        for transform in self.transform:
			
 
				+            if transform in (None, 'none', 'clr'):
			
 
				+                self.labels.append(np.zeros((self.I, self.C)))
			
 
				+                self.labels_cov.append(np.zeros((self.I, self.C, self.C)))
			
 
				+            elif transform in ('alr', 'ilr'):
			
 
				+                self.labels.append(np.zeros((self.I, self.C - 1)))
			
 
				+                self.labels_cov.append(np.zeros((self.I, self.C - 1, self.C - 1)))
			
 
				+            else:
			
 
				+                raise Exception('Unknown transform!')
			
 
				+        if save_samples:
			
 
				+            self.samples = np.zeros((self.num_epochs - self.burn_in, self.I, self.C - 1))
			
 
				+        self.updateable = updateable
			
 
				+        self.vote_classes = None
			
 
				+
			
 
				+        # estimate label means and covariances using cllda
			
 
				+        self.cllda(votes, workers, instances)
			
 
				+
			
 
				+        # clean up
			
 
				+        if not self.updateable:
			
 
				+            self.vote_classes = None
			
 
				+        else:
			
 
				+            self.votes = votes
			
 
				+            self.instances = instances
			
 
				+            self.workers = workers
			
 
				+
			
 
				+    # CLLDA optimization using Gibbs sampling
			
 
				+    def cllda(self, votes, workers, instances, starting_epoch=0):
			
 
				+        """
			
 
				+        Performs inference on the :class: CLLDA model.
			
 
				+
			
 
				+        :param votes: List of vote values.
			
 
				+        :param workers: List of workers who submitted :param votes.
			
 
				+        :param instances: List of instances to which the :param votes pertain.
			
 
				+        :param starting_epoch: How many epochs have already been incorporated in the averages.
			
 
				+        """
			
 
				+
			
 
				+        # precalculate
			
 
				+        worker_prior_sum = self.worker_prior.sum(axis=2)
			
 
				+        instance_prior_sum = self.instance_prior.sum()
			
 
				+
			
 
				+        # initial estimates
			
 
				+        if self.vote_classes is None:
			
 
				+            if self.C == self.R:
			
 
				+                self.vote_classes = votes.copy()
			
 
				+            else:
			
 
				+                self.vote_classes = self.rng.randint(0, self.C, self.V)
			
 
				+
			
 
				+        # calculate vote weights
			
 
				+        temp = np.vstack((workers, instances)).T
			
 
				+        temp = np.ascontiguousarray(temp).view(np.dtype((np.void, temp.dtype.itemsize * temp.shape[1])))
			
 
				+        _, unique_counts = np.unique(temp, return_counts=True)
			
 
				+        weights = 1. / unique_counts[instances]  # type: np.ndarray
			
 
				+
			
 
				+        # initial counts
			
 
				+        counts_across_images = np.zeros(shape=(self.U, self.C, self.R))
			
 
				+        counts_across_workers_and_votes = np.zeros(shape=(self.I, self.C))
			
 
				+        for it_v in range(self.V):
			
 
				+            counts_across_images[workers[it_v], self.vote_classes[it_v], votes[it_v]] += weights[it_v]
			
 
				+            counts_across_workers_and_votes[instances[it_v], self.vote_classes[it_v]] += weights[it_v]
			
 
				+        counts_across_images_and_votes = counts_across_images.sum(axis=2)
			
 
				+
			
 
				+        # set cl_transform
			
 
				+        transform = list()
			
 
				+        for tfm in self.transform:
			
 
				+            if tfm in (None, 'none'):
			
 
				+                transform.append(self.identity)
			
 
				+            elif tfm == 'clr':
			
 
				+                transform.append(clr)
			
 
				+            elif tfm == 'alr':
			
 
				+                transform.append(alr)
			
 
				+            elif tfm == 'ilr':
			
 
				+                transform.append(lambda comp: ilr(comp, mpm(self.C)))
			
 
				+
			
 
				+        # LDA functions
			
 
				+        def get_data_like():
			
 
				+            like = np.zeros(self.V)
			
 
				+            for it_v in range(self.V):
			
 
				+                i = instances[it_v]
			
 
				+                k = self.vote_classes[it_v]
			
 
				+                u = workers[it_v]
			
 
				+                v = votes[it_v]
			
 
				+                w = weights[it_v]  # type: np.ndarray
			
 
				+                like[it_v] = (counts_across_workers_and_votes[i, k] - w + self.instance_prior[k]) \
			
 
				+                             * (counts_across_images[u, k, v] - w + self.worker_prior[u, k, v]) \
			
 
				+                             / (counts_across_images_and_votes[u, k] - w + worker_prior_sum[u, k])
			
 
				+            return np.log(like).sum()
			
 
				+
			
 
				+        def get_label_prob():
			
 
				+            like = (counts_across_workers_and_votes[i, :] + self.instance_prior[:]) \
			
 
				+                   * (counts_across_images[u, :, v] + self.worker_prior[u, :, v]) \
			
 
				+                   / (counts_across_images_and_votes[u, :] + worker_prior_sum[u, :])
			
 
				+            return like / like.sum()
			
 
				+
			
 
				+        def update_labels():
			
 
				+            # create update
			
 
				+            numerator = counts_across_workers_and_votes + self.instance_prior
			
 
				+            denominator = counts_across_workers_and_votes.sum(axis=1) + instance_prior_sum
			
 
				+            update = numerator / denominator[:, np.newaxis]
			
 
				+            for it, tfm in enumerate(transform):
			
 
				+                tfmupdate = tfm(update)
			
 
				+                if hasattr(self, 'samples'):
			
 
				+                    self.samples[ep - self.burn_in, :, :] = tfmupdate
			
 
				+                # update labels
			
 
				+                delta = (tfmupdate - self.labels[it]) / (ep - self.burn_in + 1)
			
 
				+                self.labels[it] += delta
			
 
				+                # update labels_M2
			
 
				+                delta_cov = delta[:, :, np.newaxis] * delta[:, :, np.newaxis].transpose(0, 2, 1)
			
 
				+                self.labels_cov[it] += (ep - self.burn_in) * delta_cov - self.labels_cov[it] / (ep - self.burn_in + 1)
			
 
				+
			
 
				+        def update_worker_mats():
			
 
				+            # create update
			
 
				+            numerator = counts_across_images + self.worker_prior
			
 
				+            denominator = counts_across_images.sum(axis=2) + worker_prior_sum
			
 
				+            update = numerator / denominator[:, :, np.newaxis]
			
 
				+            # update labels
			
 
				+            delta = (update - self.worker_mats) / (ep - self.burn_in + 1)
			
 
				+            self.worker_mats += delta
			
 
				+
			
 
				+        # CLLDA
			
 
				+        start = time()
			
 
				+        for ep in range(starting_epoch, starting_epoch + self.num_epochs):
			
 
				+            # begin epoch
			
 
				+            print('starting epoch ' + str(ep + 1))
			
 
				+            if ep > starting_epoch:
			
 
				+                time_to_go = (time() - start) * (self.num_epochs - ep) / ep
			
 
				+                if time_to_go >= 3600:
			
 
				+                    print('Estimated time to finish: %.2f hours' % (time_to_go / 3600,))
			
 
				+                elif time_to_go >= 60:
			
 
				+                    print('Estimated time to finish: %.1f minutes' % (time_to_go / 60,))
			
 
				+                else:
			
 
				+                    print('Estimated time to finish: %.1f seconds' % (time_to_go,))
			
 
				+            ep_start = time()
			
 
				+
			
 
				+            # gibbs sampling
			
 
				+            for it_v in self.rng.permutation(self.V).astype(np.int64):
			
 
				+                # get correct indices
			
 
				+                i = instances[it_v]
			
 
				+                k = self.vote_classes[it_v]
			
 
				+                u = workers[it_v]
			
 
				+                v = votes[it_v]
			
 
				+                w = weights[it_v]
			
 
				+                # decrement counts
			
 
				+                counts_across_images[u, k, v] -= w
			
 
				+                counts_across_workers_and_votes[i, k] -= w
			
 
				+                counts_across_images_and_votes[u, k] -= w
			
 
				+                # calculate probabilities of labels for this vote
			
 
				+                probs = get_label_prob()
			
 
				+                # sample new label
			
 
				+                k = self.rng.multinomial(1, probs).argmax()
			
 
				+                self.vote_classes[it_v] = k
			
 
				+                # increment counts
			
 
				+                counts_across_images[u, k, v] += w
			
 
				+                counts_across_workers_and_votes[i, k] += w
			
 
				+                counts_across_images_and_votes[u, k] += w
			
 
				+
			
 
				+            # save information
			
 
				+            self.LL[ep] = get_data_like()
			
 
				+            if ep >= self.burn_in + starting_epoch:
			
 
				+                update_labels()
			
 
				+                update_worker_mats()
			
 
				+
			
 
				+            # print epoch LL and duration
			
 
				+            print('Epoch completed in %.1f seconds' % (time() - ep_start,))
			
 
				+            print('LL: %.6f' % (self.LL[ep]))
			
 
				+
			
 
				+        # adjust label covariances
			
 
				+        self.labels_cov = [x * self.num_samples / (self.num_samples - 1.) for x in self.labels_cov]
			
 
				+
			
 
				+        time_total = time() - start
			
 
				+        if time_total >= 3600:
			
 
				+            print('CLLDA completed in %.2f hours' % (time_total / 3600,))
			
 
				+        elif time_total >= 60:
			
 
				+            print('CLLDA completed in %.1f minutes' % (time_total / 60,))
			
 
				+        else:
			
 
				+            print('CLLDA completed in %.1f seconds' % (time_total,))
			
 
				+
			
 
				+    #
			
 
				+    def update(self, votes, workers, instances, vote_ids=None, instance_ids=None, worker_ids=None, worker_prior=None,
			
 
				+               num_epochs=1000, burn_in=200):
			
 
				+
			
 
				+        # check that this is updateble
			
 
				+        assert self.updateable, 'This model is not updateable, presumable to conserve memory.'
			
 
				+
			
 
				+        # determine IDs
			
 
				+        # for votes
			
 
				+        old_vote_ids = self.vote_ids.copy()  # type: np.ndarray
			
 
				+        if vote_ids is None:
			
 
				+            self.vote_ids = np.unique(votes)
			
 
				+            vote_dict = {y: x for x, y in enumerate(self.vote_ids)}
			
 
				+            votes = np.array([vote_dict[x] for x in votes])
			
 
				+        else:
			
 
				+            self.vote_ids = vote_ids
			
 
				+        # for instances
			
 
				+        old_instance_ids = self.instance_ids.copy()  # type: np.ndarray
			
 
				+        if instance_ids is None:
			
 
				+            self.instance_ids = np.unique(instances)
			
 
				+            instance_dict = {y: x for x, y in enumerate(self.instance_ids)}
			
 
				+            instances = np.array([instance_dict[x] for x in instances])
			
 
				+        else:
			
 
				+            self.instance_ids = instance_ids
			
 
				+        # for workers
			
 
				+        old_worker_ids = self.worker_ids.copy()  # type: np.ndarray
			
 
				+        if worker_ids is None:
			
 
				+            self.worker_ids = np.unique(workers)
			
 
				+            worker_dict = {y: x for x, y in enumerate(self.worker_ids)}
			
 
				+            workers = np.array([worker_dict[x] for x in workers])
			
 
				+        else:
			
 
				+            self.worker_ids = worker_ids
			
 
				+
			
 
				+        # update parameters
			
 
				+        self.V = len(votes)
			
 
				+        self.U = len(np.unique(workers))
			
 
				+        self.I = len(np.unique(instances))
			
 
				+        self.num_epochs = num_epochs
			
 
				+        self.burn_in = burn_in
			
 
				+
			
 
				+        # add more samples to previous solution
			
 
				+        if np.array_equal(votes, self.votes) and np.array_equal(workers, self.workers) \
			
 
				+                and np.array_equal(instances, self.instances) and np.array_equal(self.vote_ids, old_vote_ids) \
			
 
				+                and np.array_equal(self.instance_ids, old_instance_ids) \
			
 
				+                and np.array_equal(self.worker_ids, old_worker_ids):
			
 
				+            # adjust label covariances
			
 
				+            self.labels_cov = [x * (self.num_samples - 1.) / self.num_samples for x in self.labels_cov]
			
 
				+
			
 
				+            # update parameters
			
 
				+            self.LL = np.concatenate((self.LL, np.zeros(num_epochs)))
			
 
				+            old_num_samples = self.num_samples
			
 
				+            self.num_samples += num_epochs - burn_in
			
 
				+            self.votes = votes
			
 
				+            self.workers = workers
			
 
				+            self.instances = instances
			
 
				+
			
 
				+            # update cllda
			
 
				+            self.cllda(votes, workers, instances, old_num_samples - 1)
			
 
				+
			
 
				+        # keep only vote-classes and build off of them
			
 
				+        else:
			
 
				+            # insert old vote-classes and initialize new vote-classes
			
 
				+            old_vote_classes = self.vote_classes.copy()
			
 
				+            self.vote_classes = np.zeros_like(votes)
			
 
				+            old_dict = {y: x for x, y in enumerate(zip(self.votes, self.workers, self.instances))}
			
 
				+            for it, index in enumerate(zip(votes, workers, instances)):
			
 
				+                try:
			
 
				+                    self.vote_classes[it] = old_vote_classes[old_dict[index]]
			
 
				+                except KeyError:
			
 
				+                    if self.C == self.R:
			
 
				+                        self.vote_classes[it] = votes[it]
			
 
				+                    else:
			
 
				+                        self.vote_classes[it] = self.rng.randint(self.C)
			
 
				+
			
 
				+            # adjust worker_prior if necessary
			
 
				+            if not np.array_equal(self.worker_ids, old_worker_ids):
			
 
				+                assert worker_prior is not None, "Worker priors must be provided if worker_ids change."
			
 
				+                self.worker_prior = np.array(worker_prior)
			
 
				+                if self.worker_prior.ndim == 2:
			
 
				+                    self.worker_prior = np.tile(self.worker_prior[np.newaxis, :, :], [self.U, 1, 1])
			
 
				+
			
 
				+            # adjust info to save
			
 
				+            self.worker_mats = np.zeros((self.U, self.C, self.R))
			
 
				+            self.labels, self.labels_cov = list(), list()
			
 
				+            for transform in self.transform:
			
 
				+                if transform in (None, 'none', 'clr'):
			
 
				+                    self.labels.append(np.zeros((self.I, self.C)))
			
 
				+                    self.labels_cov.append(np.zeros((self.I, self.C, self.C)))
			
 
				+                elif transform in ('alr', 'ilr'):
			
 
				+                    self.labels.append(np.zeros((self.I, self.C - 1)))
			
 
				+                    self.labels_cov.append(np.zeros((self.I, self.C - 1, self.C - 1)))
			
 
				+                else:
			
 
				+                    raise Exception('Unknown transform!')
			
 
				+
			
 
				+            # update parameters
			
 
				+            self.LL = np.zeros(num_epochs)
			
 
				+            self.num_samples = num_epochs
			
 
				+
			
 
				+            # update cllda
			
 
				+            self.cllda(votes, workers, instances)
			
 
				+            self.votes = votes
			
 
				+            self.instances = instances
			
 
				+            self.workers = workers
			
 
				+
			
 
				+    # no cl_transform
			
 
				+    @staticmethod
			
 
				+    def identity(compositional):
			
 
				+        return compositional
			
 
				+
			
 
				+
			
 
				+def concurrent_cllda(models, votes, workers, instances, nprocs=4, **kwargs):
			
 
				+    """
			
 
				+    Effortless parallelization of multiple CLLDA models.
			
 
				+
			
 
				+    :param models: If creating new models, an integer denoting how many models to create.
			
 
				+        Otherwise, a list of existing models to update.
			
 
				+    :param votes: List of vote values.
			
 
				+    :param workers: List of uses who submitted :param votes.
			
 
				+    :param instances: List of instances to which the :param votes pertain.
			
 
				+    :param nprocs: Number of processors to use in the parallel pool.
			
 
				+    :param kwargs: Other possible inputs to either CLLDA.__init__ or CLLDA.update
			
 
				+    :return: List of new or updated CLLDA models.
			
 
				+    """
			
 
				+
			
 
				+    # open parallel pool
			
 
				+    print('Starting multiprocessing pool...')
			
 
				+    pool = mp.Pool(processes=nprocs)
			
 
				+
			
 
				+    # run CL-LDA
			
 
				+    if isinstance(models, int):
			
 
				+        print('Starting new CL-LDA models in parallel...')
			
 
				+        if 'seed' in kwargs.keys():
			
 
				+            np.random.seed(kwargs['seed'])
			
 
				+        kwargs = [deepcopy(kwargs) for x in range(models)]
			
 
				+        for it in range(models):
			
 
				+            kwargs[it]['seed'] = np.random.randint(int(1e8))
			
 
				+        out = pool.map(_new_cllda, [(votes, workers, instances, kwa) for kwa in kwargs])
			
 
				+
			
 
				+    elif hasattr(models, '__iter__'):
			
 
				+        print('Updating CL-LDA models in parallel...')
			
 
				+        out = pool.map(_update_cllda, [(model, votes, workers, instances, kwargs) for model in models])
			
 
				+
			
 
				+    else:
			
 
				+        pool.close()
			
 
				+        TypeError('Unknown type for input: models.')
			
 
				+
			
 
				+    # close parallel pool
			
 
				+    pool.close()
			
 
				+    print('Multiprocessing pool closed.')
			
 
				+
			
 
				+    return out
			
 
				+
			
 
				+
			
 
				+def combine_cllda(models):
			
 
				+    """
			
 
				+    Combine multiple CLLDA instances.
			
 
				+    :param models: List of CLLDA models trained with the same settings.
			
 
				+    :return: CLLDA model which combines the input models.
			
 
				+    """
			
 
				+
			
 
				+    # check models are equivalent
			
 
				+    assert np.equal(models[0].V, [model.V for model in models[1:]]).any(), 'Different number of votes!'
			
 
				+    assert np.equal(models[0].U, [model.U for model in models[1:]]).any(), 'Different number of workers!'
			
 
				+    assert np.equal(models[0].I, [model.I for model in models[1:]]).any(), 'Different number of instances!'
			
 
				+    assert np.equal(models[0].C, [model.C for model in models[1:]]).any(), 'Different number of classes!'
			
 
				+    assert np.equal(models[0].R, [model.R for model in models[1:]]).any(), 'Different number of responses!'
			
 
				+    assert np.equal(models[0].worker_prior,
			
 
				+                    [model.worker_prior for model in models[1:]]).any(), 'Different worker priors!'
			
 
				+    assert np.equal(models[0].instance_prior,
			
 
				+                    [model.instance_prior for model in models[1:]]).any(), 'Different instance priors!'
			
 
				+    assert np.all([models[0].transform == model.transform for model in models[1:]]), 'Different transforms!'
			
 
				+
			
 
				+    # data info
			
 
				+    out = deepcopy((models[0]))
			
 
				+
			
 
				+    # combine label estimates
			
 
				+    out.num_samples = np.sum([model.num_samples for model in models])
			
 
				+
			
 
				+    # combine worker estimates
			
 
				+    out.worker_mats = np.sum([model.worker_mats * model.num_samples for model in models],
			
 
				+                             axis=0) / out.num_samples
			
 
				+
			
 
				+    if all([x.updateable for x in models]):
			
 
				+        out.vote_classes = mode(np.stack([x.vote_classes for x in models]))[0].flatten()
			
 
				+
			
 
				+    # combine labels and label covariances
			
 
				+    for it in range(len(models[0].transform)):
			
 
				+        out.labels[it] = np.sum([model.labels[it] * model.num_samples for model in models], 0) / out.num_samples
			
 
				+        labels_corrmat = [(model.num_samples - 1.) / model.num_samples * model.labels_cov[it]
			
 
				+                          + model.labels[it][..., np.newaxis] * model.labels[it][..., np.newaxis].transpose(0, 2, 1)
			
 
				+                          for model in models]
			
 
				+        out.labels_cov[it] = np.sum([corrmat * model.num_samples for model, corrmat in zip(models, labels_corrmat)],
			
 
				+                                    0) \
			
 
				+                             / out.num_samples - out.labels[it][..., np.newaxis] * out.labels[it][
			
 
				+            ..., np.newaxis].transpose(0, 2, 1)
			
 
				+
			
 
				+        # adjust label covariances
			
 
				+        out.labels_cov[it] *= out.num_samples / (out.num_samples - 1.)
			
 
				+
			
 
				+    return out
			
 
				+
			
 
				+
			
 
				+# map function
			
 
				+def _new_cllda(inputs):
			
 
				+    return CLLDA(*inputs[:3], **inputs[3])
			
 
				+
			
 
				+
			
 
				+# map function
			
 
				+def _update_cllda(inputs):
			
 
				+    inputs[0].update(*inputs[1:4], **inputs[4])
			
 
				+    return inputs[0]
			
 
				+
			
 
				+
			
 
				+# if __name__ == '__main__':
			
 
				+#     # test suite
			
 
				+#     from DS import DS
			
 
				+#     test_data = DS.test_data()
			
 
				+#     CLLDA(test_data[0], test_data[1], test_data[2], num_epochs=10, burn_in=2, transform=('none', 'alr', 'ilr', 'clr'))
			
 
				+#     cls = concurrent_cllda(4, test_data[0], test_data[1], test_data[2],
			
 
				+#                            num_epochs=10, burn_in=2, transform=('none', 'alr', 'ilr', 'clr'))
			
 
				+#     cl = combine_cllda(cls)
			
 
				+#     a=1
			
--- a/labels/crowd_labeling/DS.py
+++ b/labels/crowd_labeling/DS.py
@@ -0,0 +1,309 @@
 
				+import numpy as np
			
 
				+from time import time
			
 
				+
			
 
				+
			
 
				+class DS:
			
 
				+
			
 
				+    def __init__(self, votes=None, workers=None, instances=None, test=None,
			
 
				+                 transform=None, classes=None, num_epochs=1000):
			
 
				+
			
 
				+        if votes is None or workers is None or instances is None:
			
 
				+            votes, workers, instances, test = self.test_data()
			
 
				+
			
 
				+        # data info
			
 
				+        self.V = len(votes)
			
 
				+        self.U = len(np.unique(workers))
			
 
				+        self.I = len(np.unique(instances))
			
 
				+        if classes is not None:
			
 
				+            self.C = len(classes)
			
 
				+        else:
			
 
				+            self.C = len(np.unique(votes))
			
 
				+        self.transform = transform
			
 
				+        self.instance_prior = np.zeros(self.C)
			
 
				+        self.eps = np.finfo(np.float64).eps
			
 
				+
			
 
				+        # EM parameters
			
 
				+        self.max_epochs = num_epochs
			
 
				+
			
 
				+        # info to save
			
 
				+        self.LL = np.nan * np.ones(self.max_epochs)
			
 
				+        if test is not None:
			
 
				+            self.accuracy = np.nan * np.ones(self.max_epochs)
			
 
				+        self.labels = np.zeros((self.I, self.C))
			
 
				+        # self.labels_cov = np.zeros((self.I, self.C, self.C))
			
 
				+        self.worker_skill = np.zeros((self.U, self.C, self.C))
			
 
				+
			
 
				+        # estimate label means and covariances using ds
			
 
				+        self.ds(votes, workers, instances, test)
			
 
				+
			
 
				+        # apply transform
			
 
				+        if transform == 'clr':
			
 
				+
			
 
				+            def clr(self):
			
 
				+                continuous = np.log(self.labels + self.eps)
			
 
				+                continuous -= continuous.mean(1, keepdims=True)
			
 
				+                return continuous
			
 
				+
			
 
				+            self.labels = clr(self)
			
 
				+
			
 
				+        elif transform == 'alr':
			
 
				+
			
 
				+            def alr(self):
			
 
				+                continuous = np.log(self.labels[:, :-1] / (self.labels[:, -1] + self.eps))
			
 
				+                return continuous
			
 
				+
			
 
				+            self.labels = alr(self)
			
 
				+
			
 
				+        elif transform == 'ilr':
			
 
				+
			
 
				+            # make projection matrix
			
 
				+            self.projectionMatrix = np.zeros((self.C, self.C - 1), dtype=np.float32)
			
 
				+            for it in range(self.C - 1):
			
 
				+                i = it + 1
			
 
				+                self.projectionMatrix[:i, it] = 1. / i
			
 
				+                self.projectionMatrix[i, it] = -1
			
 
				+                self.projectionMatrix[i + 1:, it] = 0
			
 
				+                self.projectionMatrix[:, it] *= np.sqrt(i / (i + 1.))
			
 
				+
			
 
				+            def ilr(self):
			
 
				+                continuous = np.log(self.labels + self.eps)
			
 
				+                continuous -= continuous.mean(1, keepdims=True)
			
 
				+                continuous = np.dot(continuous, self.projectionMatrix)
			
 
				+                return continuous
			
 
				+
			
 
				+            self.labels = ilr(self)
			
 
				+
			
 
				+
			
 
				+        # # adjust label covariances and inverses
			
 
				+        # self.__estimate_label_cov_with_inv_wishart()
			
 
				+        # self.labels_cov *= (self.num_samples + 1) / self.num_samples
			
 
				+
			
 
				+    # EM functions
			
 
				+    def calculate_log_like(self, worker_ind, vote_ind):
			
 
				+        # calculate log-likelihood (2.7 is DS)
			
 
				+        LL = 0
			
 
				+        for i in range(self.I):
			
 
				+            LL += np.log((self.worker_skill[worker_ind[i], :, vote_ind[i]].prod(0) * self.instance_prior).sum())
			
 
				+        LL /= self.I
			
 
				+        return LL
			
 
				+
			
 
				+    # estimate the instance classes given the current parameters (these labels are treated as missing data here for EM)
			
 
				+    def e_step(self, worker_ind, vote_ind):
			
 
				+        # estimate instance classes (2.5 is DS)
			
 
				+        for i in range(self.I):
			
 
				+            self.labels[i, :] = self.worker_skill[worker_ind[i], :, vote_ind[i]].prod(0)
			
 
				+        self.labels *= self.instance_prior[np.newaxis, :]
			
 
				+        self.labels /= self.labels.sum(1, keepdims=True) + self.eps
			
 
				+
			
 
				+    # update parameters to maximize the data likelihood
			
 
				+    def m_step(self, instance_ind):
			
 
				+        # argmax LL over class probabilities (2.4 is DS)
			
 
				+        self.instance_prior = self.labels.mean(0) + self.eps
			
 
				+        # argmax LL over worker skill (2.3 is DS)
			
 
				+        for u in range(self.U):
			
 
				+            for c in range(self.C):
			
 
				+                self.worker_skill[u, :, c] = self.labels[instance_ind[u][c], :].sum(0)
			
 
				+        self.worker_skill /= self.worker_skill.sum(2, keepdims=True) + self.eps
			
 
				+
			
 
				+    # DS optimization using EM
			
 
				+    def ds(self, votes, workers, instances, test):
			
 
				+        # precalculate indices
			
 
				+        print('Generating indices...')
			
 
				+        worker_ind, vote_ind, instance_ind = [], [], []
			
 
				+        for i in range(self.I):
			
 
				+            _instance_ind = instances == i
			
 
				+            worker_ind.append(workers[_instance_ind])
			
 
				+            vote_ind.append(votes[_instance_ind])
			
 
				+        for u in range(self.U):
			
 
				+            instance_ind.append([])
			
 
				+            _worker_ind = workers == u
			
 
				+            for c in range(self.C):
			
 
				+                _vote_ind = votes == c
			
 
				+                instance_ind[u].append(instances[np.bitwise_and(_worker_ind, _vote_ind)])
			
 
				+
			
 
				+        # DS
			
 
				+        start = time()
			
 
				+        for ep in range(self.max_epochs):
			
 
				+            # begin epoch
			
 
				+            print('starting epoch ' + str(ep + 1))
			
 
				+            if ep:
			
 
				+                time_to_go = (time() - start) * (self.max_epochs - ep) / ep
			
 
				+                if time_to_go >= 3600:
			
 
				+                    print('Estimated time to finish: %.2f hours' % (time_to_go / 3600,))
			
 
				+                elif time_to_go >= 60:
			
 
				+                    print('Estimated time to finish: %.2f minutes' % (time_to_go / 60,))
			
 
				+                else:
			
 
				+                    print('Estimated time to finish: %.1f seconds' % (time_to_go,))
			
 
				+            ep_start = time()
			
 
				+
			
 
				+            # EM
			
 
				+            print('E step...')
			
 
				+            if not ep:
			
 
				+                # initial estimates
			
 
				+                for i in range(self.I):
			
 
				+                    ind = instances == i
			
 
				+                    for c in range(self.C):
			
 
				+                        self.labels[i, c] = np.count_nonzero(votes[ind] == c)
			
 
				+                self.labels /= self.labels.sum(1, keepdims=True) + self.eps
			
 
				+                # self.labels[np.random.choice(self.I, self.I/2, replace=False), :] = 1. / self.C
			
 
				+            else:
			
 
				+                self.e_step(worker_ind, vote_ind)
			
 
				+            print('M step...')
			
 
				+            self.m_step(instance_ind)
			
 
				+
			
 
				+            # save information
			
 
				+            print('Calculating log-likelihood...')
			
 
				+            self.LL[ep] = self.calculate_log_like(worker_ind, vote_ind)
			
 
				+            print('Log-likelihood = %f' % (self.LL[ep],))
			
 
				+
			
 
				+            # evaulation if test available
			
 
				+            if test is not None:
			
 
				+                self.accuracy[ep] = (self.labels.argmax(1) == test).mean()
			
 
				+                print('Accuracy = %f' % (self.accuracy[ep],))
			
 
				+                # ce = -np.log(self.labels[range(self.I), test] + self.eps).sum()
			
 
				+                # print 'Cross Entropy = %f' % (ce,)
			
 
				+
			
 
				+            # print epoch duration
			
 
				+            print('Epoch completed in %.1f seconds' % (time() - ep_start,))
			
 
				+
			
 
				+        time_total = time() - start
			
 
				+        if time_total >= 3600:
			
 
				+            print('DS completed in %.2f hours' % (time_total / 3600,))
			
 
				+        elif time_total >= 60:
			
 
				+            print('DS completed in %.2f minutes' % (time_total / 60,))
			
 
				+        else:
			
 
				+            print('DS completed in %.1f seconds' % (time_total,))
			
 
				+
			
 
				+    # # generate covariance estimates using inverse Wishart prior
			
 
				+    # def __estimate_label_cov_with_inv_wishart(self):
			
 
				+    #     # prepare parameters
			
 
				+    #     self.inv_wishart_prior_scatter = 0.1 * np.eye(self.C - 1) * self.num_samples
			
 
				+    #     self.inv_wishart_degrees_of_freedom = self.C - 1
			
 
				+    #     scatter_matrix = self.labels_cov * self.num_samples
			
 
				+    #
			
 
				+    #     # calculate multivariate student-t covariance based on normal with known mean and inverse Wishart prior
			
 
				+    #     self.labels_cov_iwp = (self.inv_wishart_prior_scatter + scatter_matrix) \
			
 
				+    #                           / (self.inv_wishart_degrees_of_freedom + self.num_samples - (self.C - 1) - 1)
			
 
				+    #
			
 
				+    #     # calculate covariance inverses for later use
			
 
				+    #     self.labels_icov_iwp = np.linalg.inv(self.labels_cov_iwp )
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def test_data():
			
 
				+        """
			
 
				+        Sample data from the Dawid & Skene (1979) paper
			
 
				+        :return: (votes, workers, instances, true_class)
			
 
				+        """
			
 
				+        # data from DS section 4
			
 
				+        data = [[[1, 1, 1], 1, 1, 1, 1],
			
 
				+                [[3, 3, 3], 4, 3, 3, 4],
			
 
				+                [[1, 1, 2], 2, 1, 2, 2],
			
 
				+                [[2, 2, 2], 3, 1, 2, 1],
			
 
				+                [[2, 2, 2], 3, 2, 2, 2],
			
 
				+                [[2, 2, 2], 3, 3, 2, 2],
			
 
				+                [[1, 2, 2], 2, 1, 1, 1],
			
 
				+                [[3, 3, 3], 3, 4, 3, 3],
			
 
				+                [[2, 2, 2], 2, 2, 2, 3],
			
 
				+                [[2, 3, 2], 2, 2, 2, 3],
			
 
				+                [[4, 4, 4], 4, 4, 4, 4],
			
 
				+                [[2, 2, 2], 3, 3, 4, 3],
			
 
				+                [[1, 1, 1], 1, 1, 1, 1],
			
 
				+                [[2, 2, 2], 3, 2, 1, 2],
			
 
				+                [[1, 2, 1], 1, 1, 1, 1],
			
 
				+                [[1, 1, 1], 2, 1, 1, 1],
			
 
				+                [[1, 1, 1], 1, 1, 1, 1],
			
 
				+                [[1, 1, 1], 1, 1, 1, 1],
			
 
				+                [[2, 2, 2], 2, 2, 2, 1],
			
 
				+                [[2, 2, 2], 1, 3, 2, 2],
			
 
				+                [[2, 2, 2], 2, 2, 2, 2],
			
 
				+                [[2, 2, 2], 2, 2, 2, 1],
			
 
				+                [[2, 2, 2], 3, 2, 2, 2],
			
 
				+                [[2, 2, 1], 2, 2, 2, 2],
			
 
				+                [[1, 1, 1], 1, 1, 1, 1],
			
 
				+                [[1, 1, 1], 1, 1, 1, 1],
			
 
				+                [[2, 3, 2], 2, 2, 2, 2],
			
 
				+                [[1, 1, 1], 1, 1, 1, 1],
			
 
				+                [[1, 1, 1], 1, 1, 1, 1],
			
 
				+                [[1, 1, 2], 1, 1, 2, 1],
			
 
				+                [[1, 1, 1], 1, 1, 1, 1],
			
 
				+                [[3, 3, 3], 3, 2, 3, 3],
			
 
				+                [[1, 1, 1], 1, 1, 1, 1],
			
 
				+                [[2, 2, 2], 2, 2, 2, 2],
			
 
				+                [[2, 2, 2], 3, 2, 3, 2],
			
 
				+                [[4, 3, 3], 4, 3, 4, 3],
			
 
				+                [[2, 2, 1], 2, 2, 3, 2],
			
 
				+                [[2, 3, 2], 3, 2, 3, 3],
			
 
				+                [[3, 3, 3], 3, 4, 3, 2],
			
 
				+                [[1, 1, 1], 1, 1, 1, 1],
			
 
				+                [[1, 1, 1], 1, 1, 1, 1],
			
 
				+                [[1, 2, 1], 2, 1, 1, 1],
			
 
				+                [[2, 3, 2], 2, 2, 2, 2],
			
 
				+                [[1, 2, 1], 1, 1, 1, 1],
			
 
				+                [[2, 2, 2], 2, 2, 2, 2]]
			
 
				+
			
 
				+        # solutions from DS section 4
			
 
				+        test = [1,
			
 
				+                4,
			
 
				+                2,
			
 
				+                2,
			
 
				+                2,
			
 
				+                2,
			
 
				+                1,
			
 
				+                3,
			
 
				+                2,
			
 
				+                2,
			
 
				+                4,
			
 
				+                3,
			
 
				+                1,
			
 
				+                2,
			
 
				+                1,
			
 
				+                1,
			
 
				+                1,
			
 
				+                1,
			
 
				+                2,
			
 
				+                2,
			
 
				+                2,
			
 
				+                2,
			
 
				+                2,
			
 
				+                2,
			
 
				+                1,
			
 
				+                1,
			
 
				+                2,
			
 
				+                1,
			
 
				+                1,
			
 
				+                1,
			
 
				+                1,
			
 
				+                3,
			
 
				+                1,
			
 
				+                2,
			
 
				+                2,
			
 
				+                4,
			
 
				+                2,
			
 
				+                3,
			
 
				+                3,
			
 
				+                1,
			
 
				+                1,
			
 
				+                1,
			
 
				+                2,
			
 
				+                1,
			
 
				+                2]
			
 
				+
			
 
				+        # cl_transform to list format
			
 
				+        votes, workers, instances = [], [], []
			
 
				+        for it_patient, patient in enumerate(data):
			
 
				+            for it_doctor, doctor in enumerate(patient):
			
 
				+                if isinstance(doctor, list):
			
 
				+                    for diagnosis in doctor:
			
 
				+                        votes.append(diagnosis-1)
			
 
				+                        workers.append(it_doctor)
			
 
				+                        instances.append(it_patient)
			
 
				+                else:
			
 
				+                    votes.append(doctor-1)
			
 
				+                    workers.append(it_doctor)
			
 
				+                    instances.append(it_patient)
			
 
				+
			
 
				+        return np.array(votes), np.array(workers), np.array(instances), np.array(test) - 1
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    ds = DS()
			
--- a/labels/crowd_labeling/MV.py
+++ b/labels/crowd_labeling/MV.py
@@ -0,0 +1,77 @@
 
				+import numpy as np
			
 
				+
			
 
				+
			
 
				+class MV:
			
 
				+
			
 
				+    def __init__(self, votes=None, workers=None, instances=None, transform=None, classes=None):
			
 
				+
			
 
				+        # data info
			
 
				+        self.V = len(votes)
			
 
				+        self.U = len(np.unique(workers))
			
 
				+        self.I = len(np.unique(instances))
			
 
				+        if classes is not None:
			
 
				+            self.C = len(classes)
			
 
				+        else:
			
 
				+            self.C = len(np.unique(votes))
			
 
				+        self.transform = transform
			
 
				+        self.eps = np.finfo(np.float64).eps
			
 
				+
			
 
				+        # info to save
			
 
				+        self.labels = np.zeros((self.I, self.C))
			
 
				+
			
 
				+        # estimate label means and covariances using ds
			
 
				+        self.mv(votes, workers, instances)
			
 
				+
			
 
				+        # apply transform
			
 
				+        if transform == 'clr':
			
 
				+
			
 
				+            def clr(self):
			
 
				+                continuous = np.log(self.labels + self.eps)
			
 
				+                continuous -= continuous.mean(1, keepdims=True)
			
 
				+                return continuous
			
 
				+
			
 
				+            self.labels = clr(self)
			
 
				+
			
 
				+        elif transform == 'alr':
			
 
				+
			
 
				+            def alr(self):
			
 
				+                continuous = np.log(self.labels[:, :-1] / (self.labels[:, -1] + self.eps))
			
 
				+                return continuous
			
 
				+
			
 
				+            self.labels = alr(self)
			
 
				+
			
 
				+        elif transform == 'ilr':
			
 
				+
			
 
				+            # make projection matrix
			
 
				+            self.projectionMatrix = np.zeros((self.C, self.C - 1), dtype=np.float32)
			
 
				+            for it in range(self.C - 1):
			
 
				+                i = it + 1
			
 
				+                self.projectionMatrix[:i, it] = 1. / i
			
 
				+                self.projectionMatrix[i, it] = -1
			
 
				+                self.projectionMatrix[i + 1:, it] = 0
			
 
				+                self.projectionMatrix[:, it] *= np.sqrt(i / (i + 1.))
			
 
				+
			
 
				+            def ilr(self):
			
 
				+                continuous = np.log(self.labels + self.eps)
			
 
				+                continuous -= continuous.mean(1, keepdims=True)
			
 
				+                continuous = np.dot(continuous, self.projectionMatrix)
			
 
				+                return continuous
			
 
				+
			
 
				+            self.labels = ilr(self)
			
 
				+
			
 
				+    # DS optimization using EM
			
 
				+    def mv(self, votes, workers, instances):
			
 
				+
			
 
				+        # vote weights
			
 
				+        temp = np.vstack((workers, instances)).T
			
 
				+        temp = np.ascontiguousarray(temp).view(np.dtype((np.void, temp.dtype.itemsize * temp.shape[1])))
			
 
				+        _, unique_counts = np.unique(temp, return_counts=True)
			
 
				+        weights = 1. / unique_counts[instances]
			
 
				+
			
 
				+        # initial estimates
			
 
				+        for i in range(self.I):
			
 
				+            ind = instances == i
			
 
				+            for c in range(self.C):
			
 
				+                self.labels[i, c] = ((votes[ind] == c) * weights[ind]).sum()
			
 
				+        self.labels /= self.labels.sum(1, keepdims=True) + self.eps
			
 
				+
			
--- a/labels/crowd_labeling/README.md
+++ b/labels/crowd_labeling/README.md
@@ -0,0 +1,13 @@
 
				+# Crowd Labeling
			
 
				+Code to perform crowd labeling.
			
 
				+
			
 
				+#### Supported languages:
			
 
				+* Python
			
 
				+* MATLAB (planned)
			
 
				+
			
 
				+#### Supported algorithms:
			
 
				+* Majority Vote
			
 
				+* Dawid & Skene
			
 
				+* Crowd Labeling Latent Dirichlet Allocation
			
 
				+
			
 
				+Pull requests are welcome.
			
--- a/labels/crowd_labeling/__init__.py
+++ b/labels/crowd_labeling/__init__.py
@@ -0,0 +1,4 @@
 
				+from .MV import MV
			
 
				+from .DS import DS
			
 
				+from .CLLDA import CLLDA, combine_cllda, concurrent_cllda
			
 
				+from . import logratio_transformations
			
--- a/labels/crowd_labeling/logratio_transformations.py
+++ b/labels/crowd_labeling/logratio_transformations.py
@@ -0,0 +1,205 @@
 
				+import numpy as np
			
 
				+
			
 
				+
			
 
				+# numpy functions
			
 
				+
			
 
				+# additive log-ratio cl_transform
			
 
				+def additive_log_ratio_transform(compositional):
			
 
				+    """Applies the additive log-ratio transform to compositional data."""
			
 
				+    compositional = compositional[:] + np.finfo(compositional.dtype).eps
			
 
				+    continuous = np.log(compositional[..., :-1] / compositional[..., -1, np.newaxis])
			
 
				+    return continuous
			
 
				+
			
 
				+
			
 
				+# inverse additive log-ratio cl_transform
			
 
				+def inverse_additive_log_ratio_transform(continuous):
			
 
				+    """Inverts the additive log-ratio transform, producing compositional data."""
			
 
				+    n = continuous.shape[0]
			
 
				+    compositional = np.hstack((np.exp(continuous), np.ones((n, 1))))
			
 
				+    compositional /= compositional.sum(axis=-1, keepdims=1)
			
 
				+    return compositional
			
 
				+
			
 
				+
			
 
				+# centered log-ratio cl_transform
			
 
				+def centered_log_ratio_transform(compositional):
			
 
				+    """Applies the centered log-ratio transform to compositional data."""
			
 
				+    continuous = np.log(compositional + np.finfo(compositional.dtype).eps)
			
 
				+    continuous -= continuous.mean(-1, keepdims=True)
			
 
				+    return continuous
			
 
				+
			
 
				+
			
 
				+# inverse centered log-ratio cl_transform
			
 
				+def inverse_centered_log_ratio_transform(continuous):
			
 
				+    """Inverts the centered log-ratio transform, producing compositional data."""
			
 
				+    compositional = np.exp(continuous)
			
 
				+    compositional /= compositional.sum(axis=-1, keepdims=1)
			
 
				+    return compositional
			
 
				+
			
 
				+
			
 
				+# isometric log-ratio cl_transform
			
 
				+def isometric_log_ratio_transform(compositional, projection_matrix):
			
 
				+    """Applies the isometric log-ratio transform to compositional data."""
			
 
				+    continuous = centered_log_ratio_transform(compositional)
			
 
				+    continuous = np.dot(continuous, projection_matrix)
			
 
				+    return continuous
			
 
				+
			
 
				+
			
 
				+# inverse isometric log-ratio cl_transform
			
 
				+def inverse_isometric_log_ratio_transform(continuous, projection_matrix):
			
 
				+    """Inverts the isometric log-ratio transform, producing compositional data."""
			
 
				+    continuous = np.dot(continuous, projection_matrix.T)
			
 
				+    compositional = inverse_centered_log_ratio_transform(continuous)
			
 
				+    return compositional
			
 
				+
			
 
				+
			
 
				+# isometric log-ratio cl_transform
			
 
				+def easy_isometric_log_ratio_transform(compositional):
			
 
				+    """Applies the isometric log-ratio transform to compositional data."""
			
 
				+    continuous = centered_log_ratio_transform(compositional)
			
 
				+    projection_matrix = make_projection_matrix(continuous.shape[1])
			
 
				+    continuous = np.dot(continuous, projection_matrix)
			
 
				+    return continuous
			
 
				+
			
 
				+
			
 
				+# inverse isometric log-ratio cl_transform
			
 
				+def easy_inverse_isometric_log_ratio_transform(continuous):
			
 
				+    """Inverts the isometric log-ratio transform, producing compositional data."""
			
 
				+    projection_matrix = make_projection_matrix(continuous.shape[1] + 1)
			
 
				+    continuous = np.dot(continuous, projection_matrix.T)
			
 
				+    compositional = inverse_centered_log_ratio_transform(continuous)
			
 
				+    return compositional
			
 
				+
			
 
				+
			
 
				+# projection matrix for isometric log-ratio cl_transform
			
 
				+def make_projection_matrix(dimension):
			
 
				+    """Creates the projection matrix for the  the isometric log-ratio transform."""
			
 
				+    projection_matrix = np.zeros((dimension, dimension - 1), dtype=np.float32)
			
 
				+    for it in range(dimension - 1):
			
 
				+        i = it + 1
			
 
				+        projection_matrix[:i, it] = 1. / i
			
 
				+        projection_matrix[i, it] = -1
			
 
				+        projection_matrix[i + 1:, it] = 0
			
 
				+        projection_matrix[:, it] *= np.sqrt(i / (i + 1.))
			
 
				+    return projection_matrix
			
 
				+
			
 
				+
			
 
				+# theano functions
			
 
				+eps = np.finfo(np.float32).eps
			
 
				+
			
 
				+
			
 
				+# additive log-ratio cl_transform
			
 
				+def theano_additive_log_ratio_transform(compositional):
			
 
				+    """Applies the additive log-ratio transform to compositional data."""
			
 
				+    from theano import tensor as T
			
 
				+    compositional = compositional[:] + eps
			
 
				+    continuous = T.log(compositional[..., :-1] /
			
 
				+                       compositional[..., -1].reshape(compositional.shape[:-1] + (1,)))
			
 
				+    return continuous
			
 
				+
			
 
				+
			
 
				+# inverse additive log-ratio cl_transform
			
 
				+def theano_inverse_additive_log_ratio_transform(continuous):
			
 
				+    """Inverts the additive log-ratio transform, producing compositional data."""
			
 
				+    from theano import tensor as T
			
 
				+    compositional = T.stack((T.exp(continuous), T.ones((continuous.shape[0], 1))), axis=continuous.ndim - 1)
			
 
				+    compositional /= compositional.sum(axis=-1, keepdims=1)
			
 
				+    return compositional
			
 
				+
			
 
				+
			
 
				+# centered log-ratio cl_transform
			
 
				+def theano_centered_log_ratio_transform(compositional):
			
 
				+    """Applies the centered log-ratio transform to compositional data."""
			
 
				+    from theano import tensor as T
			
 
				+    compositional = compositional[:] + eps
			
 
				+    continuous = T.log(compositional)
			
 
				+    continuous -= continuous.mean(-1, keepdims=True)
			
 
				+    return continuous
			
 
				+
			
 
				+
			
 
				+# inverse centered log-ratio cl_transform
			
 
				+def theano_inverse_centered_log_ratio_transform(continuous):
			
 
				+    """Inverts the centered log-ratio transform, producing compositional data."""
			
 
				+    from theano import tensor as T
			
 
				+    compositional = T.exp(continuous)
			
 
				+    compositional /= compositional.sum(axis=-1, keepdims=1)
			
 
				+    return compositional
			
 
				+
			
 
				+
			
 
				+# isometric log-ratio cl_transform
			
 
				+def theano_isometric_log_ratio_transform(compositional, projection_matrix):
			
 
				+    """Applies the isometric log-ratio transform to compositional data."""
			
 
				+    from theano import tensor as T
			
 
				+    continuous = theano_centered_log_ratio_transform(compositional)
			
 
				+    continuous = T.dot(continuous, projection_matrix)
			
 
				+    return continuous
			
 
				+
			
 
				+
			
 
				+# inverse isometric log-ratio cl_transform
			
 
				+def theano_inverse_isometric_log_ratio_transform(continuous, projection_matrix):
			
 
				+    """Inverts the isometric log-ratio transform, producing compositional data."""
			
 
				+    from theano import tensor as T
			
 
				+    continuous = T.dot(continuous, projection_matrix.T)
			
 
				+    compositional = theano_inverse_centered_log_ratio_transform(continuous)
			
 
				+    return compositional
			
 
				+
			
 
				+
			
 
				+# tensorflow functions
			
 
				+
			
 
				+# additive log-ratio cl_transform
			
 
				+def tf_additive_log_ratio_transform(compositional, name='alrt'):
			
 
				+    """Applies the additive log-ratio transform to compositional data."""
			
 
				+    import tensorflow as tf
			
 
				+    compositional = compositional + eps
			
 
				+    continuous = tf.log(compositional[..., :-1] /
			
 
				+                       compositional[..., -1].reshape(compositional.shape[:-1] + (1,)), name=name)
			
 
				+    return continuous
			
 
				+
			
 
				+
			
 
				+# inverse additive log-ratio cl_transform
			
 
				+def tf_inverse_additive_log_ratio_transform(continuous, name='ialrt'):
			
 
				+    """Inverts the additive log-ratio transform, producing compositional data."""
			
 
				+    import tensorflow as tf
			
 
				+    compositional = tf.stack((tf.exp(continuous), tf.ones((continuous.shape[0], 1))), axis=tf.get_shape(continuous).ndim - 1)
			
 
				+    compositional /= tf.reduce_sum(compositional, axis=-1, keep_dims=True, name=name)
			
 
				+    return compositional
			
 
				+
			
 
				+
			
 
				+# centered log-ratio cl_transform
			
 
				+def tf_centered_log_ratio_transform(compositional, name='clrt'):
			
 
				+    """Applies the centered log-ratio transform to compositional data."""
			
 
				+    import tensorflow as tf
			
 
				+    compositional = compositional[:] + eps
			
 
				+    continuous = tf.log(compositional)
			
 
				+    continuous -= tf.reduce_mean(continuous, axis=-1, keep_dims=True)
			
 
				+    if name:
			
 
				+        continuous = tf.identity(continuous, name=name)
			
 
				+    return continuous
			
 
				+
			
 
				+
			
 
				+# inverse centered log-ratio cl_transform
			
 
				+def tf_inverse_centered_log_ratio_transform(continuous, name='iclrt'):
			
 
				+    """Inverts the centered log-ratio transform, producing compositional data."""
			
 
				+    import tensorflow as tf
			
 
				+    compositional = tf.exp(continuous)
			
 
				+    compositional /= tf.reduce_sum(compositional, axis=-1, keep_dims=True)
			
 
				+    if name:
			
 
				+        compositional = tf.identity(compositional, name=name)
			
 
				+    return compositional
			
 
				+
			
 
				+
			
 
				+# isometric log-ratio cl_transform
			
 
				+def tf_isometric_log_ratio_transform(compositional, projection_matrix, name='ilrt'):
			
 
				+    """Applies the isometric log-ratio transform to compositional data."""
			
 
				+    import tensorflow as tf
			
 
				+    continuous = tf_centered_log_ratio_transform(compositional, name=None)
			
 
				+    continuous = tf.matmul(continuous, projection_matrix, name=name)
			
 
				+    return continuous
			
 
				+
			
 
				+
			
 
				+# inverse isometric log-ratio cl_transform
			
 
				+def tf_inverse_isometric_log_ratio_transform(continuous, projection_matrix, name='iilrt'):
			
 
				+    """Inverts the isometric log-ratio transform, producing compositional data."""
			
 
				+    import tensorflow as tf
			
 
				+    continuous = tf.matmul(continuous, projection_matrix, transpose_b=True)
			
 
				+    compositional = tf_inverse_centered_log_ratio_transform(continuous, name=None)
			
 
				+    return tf.identity(compositional, name=name)