spike_train_dissimilarity.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. # -*- coding: utf-8 -*-
  2. """
  3. In neuroscience one often wants to evaluate, how similar or dissimilar pairs
  4. or even large sets of spiketrains are. For this purpose various different
  5. spike train dissimilarity measures were introduced in the literature.
  6. They differ, e.g., by the properties of having the mathematical properties of
  7. a metric or by being time-scale dependent or not. Well known representatives
  8. of spike train dissimilarity measures are the Victor-Purpura distance and the
  9. Van Rossum distance implemented in this module, which both are metrics in the
  10. mathematical sense and time-scale dependent.
  11. :copyright: Copyright 2016 by the Elephant team, see AUTHORS.txt.
  12. :license: Modified BSD, see LICENSE.txt for details.
  13. """
  14. import quantities as pq
  15. import numpy as np
  16. import scipy as sp
  17. import elephant.kernels as kernels
  18. from neo.core import SpikeTrain
  19. # Problem of conversion from Python 2 to Python 3:
  20. # 'xrange' in Python 2 is 'range' in Python 3.
  21. try:
  22. xrange
  23. except NameError:
  24. xrange = range
  25. def _create_matrix_from_indexed_function(
  26. shape, func, symmetric_2d=False, **func_params):
  27. mat = np.empty(shape)
  28. if symmetric_2d:
  29. for i in xrange(shape[0]):
  30. for j in xrange(i, shape[1]):
  31. mat[i, j] = mat[j, i] = func(i, j, **func_params)
  32. else:
  33. for idx in np.ndindex(*shape):
  34. mat[idx] = func(*idx, **func_params)
  35. return mat
  36. def victor_purpura_dist(
  37. trains, q=1.0 * pq.Hz, kernel=None, sort=True, algorithm='fast'):
  38. """
  39. Calculates the Victor-Purpura's (VP) distance. It is often denoted as
  40. :math:`D^{\\text{spike}}[q]`.
  41. It is defined as the minimal cost of transforming spike train `a` into
  42. spike train `b` by using the following operations:
  43. * Inserting or deleting a spike (cost 1.0).
  44. * Shifting a spike from :math:`t` to :math:`t'` (cost :math:`q
  45. \\cdot |t - t'|`).
  46. A detailed description can be found in
  47. *Victor, J. D., & Purpura, K. P. (1996). Nature and precision of
  48. temporal coding in visual cortex: a metric-space analysis. Journal of
  49. Neurophysiology.*
  50. Given the average number of spikes :math:`n` in a spike train and
  51. :math:`N` spike trains the run-time complexity of this function is
  52. :math:`O(N^2 n^2)` and :math:`O(N^2 + n^2)` memory will be needed.
  53. Parameters
  54. ----------
  55. trains : Sequence of :class:`neo.core.SpikeTrain` objects of
  56. which the distance will be calculated pairwise.
  57. q: Quantity scalar
  58. Cost factor for spike shifts as inverse time scalar.
  59. Extreme values :math:`q=0` meaning no cost for any shift of
  60. spikes, or :math: `q=np.inf` meaning infinite cost for any
  61. spike shift and hence exclusion of spike shifts, are explicitly
  62. allowed. If `kernel` is not `None`, :math:`q` will be ignored.
  63. Default: 1.0 * pq.Hz
  64. kernel: :class:`.kernels.Kernel`
  65. Kernel to use in the calculation of the distance. If `kernel` is
  66. `None`, an unnormalized triangular kernel with standard deviation
  67. of :math:'2.0/(q * sqrt(6.0))' corresponding to a half width of
  68. :math:`2.0/q` will be used. Usage of the default value calculates
  69. the Victor-Purpura distance correctly with a triangular kernel of
  70. the suitable width. The choice of another kernel is enabled, but
  71. this leaves the framework of Victor-Purpura distances.
  72. Default: None
  73. sort: bool
  74. Spike trains with sorted spike times will be needed for the
  75. calculation. You can set `sort` to `False` if you know that your
  76. spike trains are already sorted to decrease calculation time.
  77. Default: True
  78. algorithm: string
  79. Allowed values are 'fast' or 'intuitive', each selecting an
  80. algorithm with which to calculate the pairwise Victor-Purpura distance.
  81. Typically 'fast' should be used, because while giving always the
  82. same result as 'intuitive', within the temporary structure of
  83. Python and add-on modules as numpy it is faster.
  84. Default: 'fast'
  85. Returns
  86. -------
  87. 2-D array
  88. Matrix containing the VP distance of all pairs of spike trains.
  89. Example
  90. -------
  91. import elephant.spike_train_dissimilarity_measures as stdm
  92. q = 1.0 / (10.0 * pq.ms)
  93. st_a = SpikeTrain([10, 20, 30], units='ms', t_stop= 1000.0)
  94. st_b = SpikeTrain([12, 24, 30], units='ms', t_stop= 1000.0)
  95. vp_f = stdm.victor_purpura_dist([st_a, st_b], q)[0, 1]
  96. vp_i = stdm.victor_purpura_dist(
  97. [st_a, st_b], q, algorithm='intuitive')[0, 1]
  98. """
  99. for train in trains:
  100. if not (isinstance(train, (pq.quantity.Quantity, SpikeTrain)) and
  101. train.dimensionality.simplified ==
  102. pq.Quantity(1, "s").dimensionality.simplified):
  103. raise TypeError("Spike trains must have a time unit.")
  104. if not (isinstance(q, pq.quantity.Quantity) and
  105. q.dimensionality.simplified ==
  106. pq.Quantity(1, "Hz").dimensionality.simplified):
  107. raise TypeError("q must be a rate quantity.")
  108. if kernel is None:
  109. if q == 0.0:
  110. num_spikes = np.atleast_2d([st.size for st in trains])
  111. return np.absolute(num_spikes.T - num_spikes)
  112. elif q == np.inf:
  113. num_spikes = np.atleast_2d([st.size for st in trains])
  114. return num_spikes.T + num_spikes
  115. else:
  116. kernel = kernels.TriangularKernel(2.0 / (np.sqrt(6.0) * q))
  117. if sort:
  118. trains = [np.sort(st.view(type=pq.Quantity)) for st in trains]
  119. def compute(i, j):
  120. if i == j:
  121. return 0.0
  122. else:
  123. if algorithm == 'fast':
  124. return _victor_purpura_dist_for_st_pair_fast(
  125. trains[i], trains[j], kernel)
  126. elif algorithm == 'intuitive':
  127. return _victor_purpura_dist_for_st_pair_intuitive(
  128. trains[i], trains[j], q)
  129. else:
  130. raise NameError("algorithm must be either 'fast' "
  131. "or 'intuitive'.")
  132. return _create_matrix_from_indexed_function(
  133. (len(trains), len(trains)), compute, kernel.is_symmetric())
  134. def _victor_purpura_dist_for_st_pair_fast(train_a, train_b, kernel):
  135. """
  136. The algorithm used is based on the one given in
  137. J. D. Victor and K. P. Purpura, Nature and precision of temporal
  138. coding in visual cortex: a metric-space analysis, Journal of
  139. Neurophysiology, 1996.
  140. It constructs a matrix G[i, j] containing the minimal cost when only
  141. considering the first i and j spikes of the spike trains. However, one
  142. never needs to store more than one row and one column at the same time
  143. for calculating the VP distance.
  144. cost[0, :cost.shape[1] - i] corresponds to G[i:, i]. In the same way
  145. cost[1, :cost.shape[1] - i] corresponds to G[i, i:].
  146. Moreover, the minimum operation on the costs of the three kind of actions
  147. (delete, insert or move spike) can be split up in two operations. One
  148. operation depends only on the already calculated costs and kernel
  149. evaluation (insertion of spike vs moving a spike). The other minimum
  150. depends on that result and the cost of deleting a spike. This operation
  151. always depends on the last calculated element in the cost array and
  152. corresponds to a recursive application of
  153. f(accumulated_min[i]) = min(f(accumulated_min[i-1]), accumulated_min[i])
  154. + 1. That '+1' can be excluded from this function if the summed value for
  155. all recursive applications is added upfront to accumulated_min.
  156. Afterwards it has to be removed again except one for the currently
  157. processed spike to get the real costs up to the evaluation of i.
  158. All currently calculated costs will be considered -1 because this saves
  159. a number of additions as in most cases the cost would be increased by
  160. exactly one (the only exception is shifting, but in that calculation is
  161. already the addition of a constant involved, thus leaving the number of
  162. operations the same). The increase by one will be added after calculating
  163. all minima by shifting decreasing_sequence by one when removing it from
  164. accumulated_min.
  165. Parameters
  166. ----------
  167. train_a, train_b : :class:`neo.core.SpikeTrain` objects of
  168. which the Victor-Purpura distance will be calculated pairwise.
  169. kernel: :class:`.kernels.Kernel`
  170. Kernel to use in the calculation of the distance.
  171. Returns
  172. -------
  173. float
  174. The Victor-Purpura distance of train_a and train_b
  175. """
  176. if train_a.size <= 0 or train_b.size <= 0:
  177. return max(train_a.size, train_b.size)
  178. if train_a.size < train_b.size:
  179. train_a, train_b = train_b, train_a
  180. min_dim, max_dim = train_b.size, train_a.size + 1
  181. cost = np.asfortranarray(np.tile(np.arange(float(max_dim)), (2, 1)))
  182. decreasing_sequence = np.asfortranarray(cost[:, ::-1])
  183. kern = kernel((np.atleast_2d(train_a).T.view(type=pq.Quantity) -
  184. train_b.view(type=pq.Quantity)))
  185. as_fortran = np.asfortranarray(
  186. ((np.sqrt(6.0) * kernel.sigma) * kern).simplified)
  187. k = 1 - 2 * as_fortran
  188. for i in xrange(min_dim):
  189. # determine G[i, i] == accumulated_min[:, 0]
  190. accumulated_min = cost[:, :-i - 1] + k[i:, i]
  191. accumulated_min[1, :train_b.size - i] = \
  192. cost[1, :train_b.size - i] + k[i, i:]
  193. accumulated_min = np.minimum(
  194. accumulated_min, # shift
  195. cost[:, 1:max_dim - i]) # insert
  196. acc_dim = accumulated_min.shape[1]
  197. # delete vs min(insert, shift)
  198. accumulated_min[:, 0] = min(cost[1, 1], accumulated_min[0, 0])
  199. # determine G[i, :] and G[:, i] by propagating minima.
  200. accumulated_min += decreasing_sequence[:, -acc_dim - 1:-1]
  201. accumulated_min = np.minimum.accumulate(accumulated_min, axis=1)
  202. cost[:, :acc_dim] = accumulated_min - decreasing_sequence[:, -acc_dim:]
  203. return cost[0, -min_dim - 1]
  204. def _victor_purpura_dist_for_st_pair_intuitive(
  205. train_a, train_b, q=1.0 * pq.Hz):
  206. """
  207. Function to calculate the Victor-Purpura distance between two spike trains
  208. described in *J. D. Victor and K. P. Purpura, Nature and precision of
  209. temporal coding in visual cortex: a metric-space analysis,
  210. J Neurophysiol,76(2):1310-1326, 1996*
  211. This function originates from the spikes-module in the signals-folder
  212. of the software package Neurotools. It represents the 'intuitive'
  213. implementation of the Victor-Purpura distance. With respect to calculation
  214. time at the moment this code is uncompetitive with the code implemented in
  215. the function _victor_purpura_dist_for_st_pair_fast. However, it is
  216. expected that the discrepancy in calculation time of the 2 algorithms
  217. decreases drastically if the temporary valid calculation speed difference
  218. of plain Python and Numpy routines would be removed when languages like
  219. cython could take over. The decision then has to be made between an
  220. intuitive and probably slightly slower algorithm versus a correct but
  221. strange optimal solution of an optimization problem under boundary
  222. conditions, where the boundary conditions would finally have been removed.
  223. Hence also this algoritm is kept here.
  224. Parameters
  225. ----------
  226. train_a, train_b : :class:`neo.core.SpikeTrain` objects of
  227. which the Victor-Purpura distance will be calculated pairwise.
  228. q : Quantity scalar of rate dimension
  229. The cost parameter.
  230. Default: 1.0 * pq.Hz
  231. Returns
  232. -------
  233. float
  234. The Victor-Purpura distance of train_a and train_b
  235. """
  236. nspk_a = len(train_a)
  237. nspk_b = len(train_b)
  238. scr = np.zeros((nspk_a+1, nspk_b+1))
  239. scr[:, 0] = xrange(0, nspk_a+1)
  240. scr[0, :] = xrange(0, nspk_b+1)
  241. if nspk_a > 0 and nspk_b > 0:
  242. for i in xrange(1, nspk_a+1):
  243. for j in xrange(1, nspk_b+1):
  244. scr[i, j] = min(scr[i-1, j]+1, scr[i, j-1]+1)
  245. scr[i, j] = min(scr[i, j], scr[i-1, j-1] + np.float64((
  246. q*abs(train_a[i-1]-train_b[j-1])).simplified))
  247. return scr[nspk_a, nspk_b]
  248. def van_rossum_dist(trains, tau=1.0 * pq.s, sort=True):
  249. """
  250. Calculates the van Rossum distance.
  251. It is defined as Euclidean distance of the spike trains convolved with a
  252. causal decaying exponential smoothing filter. A detailed description can
  253. be found in *Rossum, M. C. W. (2001). A novel spike distance. Neural
  254. Computation, 13(4), 751-763.* This implementation is normalized to yield
  255. a distance of 1.0 for the distance between an empty spike train and a
  256. spike train with a single spike. Divide the result by sqrt(2.0) to get
  257. the normalization used in the cited paper.
  258. Given :math:`N` spike trains with :math:`n` spikes on average the run-time
  259. complexity of this function is :math:`O(N^2 n)`.
  260. Parameters
  261. ----------
  262. trains : Sequence of :class:`neo.core.SpikeTrain` objects of
  263. which the van Rossum distance will be calculated pairwise.
  264. tau : Quantity scalar
  265. Decay rate of the exponential function as time scalar. Controls for
  266. which time scale the metric will be sensitive. This parameter will
  267. be ignored if `kernel` is not `None`. May also be :const:`scipy.inf`
  268. which will lead to only measuring differences in spike count.
  269. Default: 1.0 * pq.s
  270. sort : bool
  271. Spike trains with sorted spike times might be needed for the
  272. calculation. You can set `sort` to `False` if you know that your
  273. spike trains are already sorted to decrease calculation time.
  274. Default: True
  275. Returns
  276. -------
  277. 2-D array
  278. Matrix containing the van Rossum distances for all pairs of
  279. spike trains.
  280. Example
  281. -------
  282. import elephant.spike_train_dissimilarity_measures as stdm
  283. tau = 10.0 * pq.ms
  284. st_a = SpikeTrain([10, 20, 30], units='ms', t_stop= 1000.0)
  285. st_b = SpikeTrain([12, 24, 30], units='ms', t_stop= 1000.0)
  286. vr = stdm.van_rossum_dist([st_a, st_b], tau)[0, 1]
  287. """
  288. for train in trains:
  289. if not (isinstance(train, (pq.quantity.Quantity, SpikeTrain)) and
  290. train.dimensionality.simplified ==
  291. pq.Quantity(1, "s").dimensionality.simplified):
  292. raise TypeError("Spike trains must have a time unit.")
  293. if not (isinstance(tau, pq.quantity.Quantity) and
  294. tau.dimensionality.simplified ==
  295. pq.Quantity(1, "s").dimensionality.simplified):
  296. raise TypeError("tau must be a time quantity.")
  297. if tau == 0:
  298. spike_counts = [st.size for st in trains]
  299. return np.sqrt(spike_counts + np.atleast_2d(spike_counts).T)
  300. elif tau == np.inf:
  301. spike_counts = [st.size for st in trains]
  302. return np.absolute(spike_counts - np.atleast_2d(spike_counts).T)
  303. k_dist = _summed_dist_matrix(
  304. [st.view(type=pq.Quantity) for st in trains], tau, not sort)
  305. vr_dist = np.empty_like(k_dist)
  306. for i, j in np.ndindex(k_dist.shape):
  307. vr_dist[i, j] = (
  308. k_dist[i, i] + k_dist[j, j] - k_dist[i, j] - k_dist[j, i])
  309. return sp.sqrt(vr_dist)
  310. def _summed_dist_matrix(spiketrains, tau, presorted=False):
  311. # The algorithm underlying this implementation is described in
  312. # Houghton, C., & Kreuz, T. (2012). On the efficient calculation of van
  313. # Rossum distances. Network: Computation in Neural Systems, 23(1-2),
  314. # 48-58. We would like to remark that in this paper in formula (9) the
  315. # left side of the equation should be divided by two.
  316. #
  317. # Given N spiketrains with n entries on average the run-time complexity is
  318. # O(N^2 * n). O(N^2 + N * n) memory will be needed.
  319. if len(spiketrains) <= 0:
  320. return np.zeros((0, 0))
  321. if not presorted:
  322. spiketrains = [v.copy() for v in spiketrains]
  323. for v in spiketrains:
  324. v.sort()
  325. sizes = np.asarray([v.size for v in spiketrains])
  326. values = np.empty((len(spiketrains), max(1, sizes.max())))
  327. values.fill(np.nan)
  328. for i, v in enumerate(spiketrains):
  329. if v.size > 0:
  330. values[i, :v.size] = \
  331. (v / tau * pq.dimensionless).simplified
  332. exp_diffs = np.exp(values[:, :-1] - values[:, 1:])
  333. markage = np.zeros(values.shape)
  334. for u in xrange(len(spiketrains)):
  335. markage[u, 0] = 0
  336. for i in xrange(sizes[u] - 1):
  337. markage[u, i + 1] = (markage[u, i] + 1.0) * exp_diffs[u, i]
  338. # Same spiketrain terms
  339. D = np.empty((len(spiketrains), len(spiketrains)))
  340. D[np.diag_indices_from(D)] = sizes + 2.0 * np.sum(markage, axis=1)
  341. # Cross spiketrain terms
  342. for u in xrange(D.shape[0]):
  343. all_ks = np.searchsorted(values[u], values, 'left') - 1
  344. for v in xrange(u):
  345. js = np.searchsorted(values[v], values[u], 'right') - 1
  346. ks = all_ks[v]
  347. slice_j = np.s_[np.searchsorted(js, 0):sizes[u]]
  348. slice_k = np.s_[np.searchsorted(ks, 0):sizes[v]]
  349. D[u, v] = np.sum(
  350. np.exp(values[v][js[slice_j]] - values[u][slice_j]) *
  351. (1.0 + markage[v][js[slice_j]]))
  352. D[u, v] += np.sum(
  353. np.exp(values[u][ks[slice_k]] - values[v][slice_k]) *
  354. (1.0 + markage[u][ks[slice_k]]))
  355. D[v, u] = D[u, v]
  356. return D