MV.py 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. import numpy as np
  2. class MV:
  3. def __init__(self, votes=None, workers=None, instances=None, transform=None, classes=None):
  4. # data info
  5. self.V = len(votes)
  6. self.U = len(np.unique(workers))
  7. self.I = len(np.unique(instances))
  8. if classes is not None:
  9. self.C = len(classes)
  10. else:
  11. self.C = len(np.unique(votes))
  12. self.transform = transform
  13. self.eps = np.finfo(np.float64).eps
  14. # info to save
  15. self.labels = np.zeros((self.I, self.C))
  16. # estimate label means and covariances using ds
  17. self.mv(votes, workers, instances)
  18. # apply transform
  19. if transform == 'clr':
  20. def clr(self):
  21. continuous = np.log(self.labels + self.eps)
  22. continuous -= continuous.mean(1, keepdims=True)
  23. return continuous
  24. self.labels = clr(self)
  25. elif transform == 'alr':
  26. def alr(self):
  27. continuous = np.log(self.labels[:, :-1] / (self.labels[:, -1] + self.eps))
  28. return continuous
  29. self.labels = alr(self)
  30. elif transform == 'ilr':
  31. # make projection matrix
  32. self.projectionMatrix = np.zeros((self.C, self.C - 1), dtype=np.float32)
  33. for it in range(self.C - 1):
  34. i = it + 1
  35. self.projectionMatrix[:i, it] = 1. / i
  36. self.projectionMatrix[i, it] = -1
  37. self.projectionMatrix[i + 1:, it] = 0
  38. self.projectionMatrix[:, it] *= np.sqrt(i / (i + 1.))
  39. def ilr(self):
  40. continuous = np.log(self.labels + self.eps)
  41. continuous -= continuous.mean(1, keepdims=True)
  42. continuous = np.dot(continuous, self.projectionMatrix)
  43. return continuous
  44. self.labels = ilr(self)
  45. # DS optimization using EM
  46. def mv(self, votes, workers, instances):
  47. # vote weights
  48. temp = np.vstack((workers, instances)).T
  49. temp = np.ascontiguousarray(temp).view(np.dtype((np.void, temp.dtype.itemsize * temp.shape[1])))
  50. _, unique_counts = np.unique(temp, return_counts=True)
  51. weights = 1. / unique_counts[instances]
  52. # initial estimates
  53. for i in range(self.I):
  54. ind = instances == i
  55. for c in range(self.C):
  56. self.labels[i, c] = ((votes[ind] == c) * weights[ind]).sum()
  57. self.labels /= self.labels.sum(1, keepdims=True) + self.eps